diff --git a/cypress/integration/origin-save.spec.js b/cypress/integration/origin-save.spec.js
index a67bd07b..86016773 100644
--- a/cypress/integration/origin-save.spec.js
+++ b/cypress/integration/origin-save.spec.js
@@ -1,346 +1,346 @@
/**
* Copyright (C) 2019-2020 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
let url;
let origin;
const $ = Cypress.$;
const saveCodeMsg = {
'success': 'The "save code now" request has been accepted and will be processed as soon as possible.',
'warning': 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.',
'rejected': 'The "save code now" request has been rejected because the provided origin url is blacklisted.',
'rateLimit': 'The rate limit for "save code now" requests has been reached. Please try again later.',
'unknownError': 'An unexpected error happened when submitting the "save code now request',
'csrfError': 'CSRF Failed: Referrer checking failed - no Referrer.'
};
function makeOriginSaveRequest(originType, originUrl) {
cy.get('#swh-input-origin-url')
.type(originUrl)
.get('#swh-input-visit-type')
.select(originType)
.get('#swh-save-origin-form')
.submit();
}
function checkAlertVisible(alertType, msg) {
cy.get('#swh-origin-save-request-status')
.should('be.visible')
.find(`.alert-${alertType}`)
.should('be.visible')
.and('contain', msg);
}
// Stub requests to save an origin
function stubSaveRequest({
requestUrl,
visitType = 'git',
saveRequestStatus,
originUrl,
saveTaskStatus,
responseStatus = 200,
errorMessage = '',
saveRequestDate = new Date(),
visitDate = new Date()
} = {}) {
let response;
if (responseStatus !== 200 && errorMessage) {
response = {
'detail': errorMessage
};
} else {
response = genOriginSaveResponse({visitType: visitType,
saveRequestStatus: saveRequestStatus,
originUrl: originUrl,
saveRequestDate: saveRequestDate,
saveTaskStatus: saveTaskStatus,
visitDate: visitDate});
}
cy.route({
method: 'POST',
status: responseStatus,
url: requestUrl,
response: response
}).as('saveRequest');
}
// Mocks API response : /save/(:visit_type)/(:origin_url)
// visit_type : {'git', 'hg', 'svn'}
function genOriginSaveResponse({
visitType = 'git',
saveRequestStatus,
originUrl,
saveRequestDate = new Date(),
saveTaskStatus,
visitDate = new Date()
} = {}) {
return {
'visit_type': visitType,
'save_request_status': saveRequestStatus,
'origin_url': originUrl,
'id': 1,
'save_request_date': saveRequestDate ? saveRequestDate.toISOString() : null,
'save_task_status': saveTaskStatus,
'visit_date': visitDate ? visitDate.toISOString() : null
};
};
describe('Origin Save Tests', function() {
before(function() {
url = this.Urls.origin_save();
origin = this.origin[0];
this.originSaveUrl = this.Urls.origin_save_request(origin.type, origin.url);
});
beforeEach(function() {
cy.fixture('origin-save').as('originSaveJSON');
cy.fixture('save-task-info').as('saveTaskInfoJSON');
cy.visit(url);
cy.server();
});
it('should display accepted message when accepted', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'accepted',
originUrl: origin.url,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should validate gitlab subproject url', function() {
const gitlabSubProjectUrl = 'https://gitlab.com/user/project/sub/';
const originSaveUrl = this.Urls.origin_save_request('git', gitlabSubProjectUrl);
stubSaveRequest({requestUrl: originSaveUrl,
saveRequestStatus: 'accepted',
originurl: gitlabSubProjectUrl,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest('git', gitlabSubProjectUrl);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should display warning message when pending', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'pending',
originUrl: origin.url,
saveTaskStatus: 'not created'});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('warning', saveCodeMsg['warning']);
});
});
it('should show error when csrf validation failed (status: 403)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'rejected',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 403,
errorMessage: saveCodeMsg['csrfError']});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['csrfError']);
});
});
it('should show error when origin is rejected (status: 403)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'rejected',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 403,
errorMessage: saveCodeMsg['rejected']});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['rejected']);
});
});
it('should show error when rate limited (status: 429)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'Request was throttled. Expected available in 60 seconds.',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 429});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['rateLimit']);
});
});
it('should show error when unknown error occurs (status other than 200, 403, 429)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'Error',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 406});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['unknownError']);
});
});
it('should display origin save info in the requests table', function() {
cy.route('GET', '/save/requests/list/**', '@originSaveJSON');
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('tbody tr').then(rows => {
let i = 0;
for (let row of rows) {
const cells = row.cells;
const requestDateStr = new Date(this.originSaveJSON.data[i].save_request_date).toLocaleString();
const saveStatus = this.originSaveJSON.data[i].save_task_status;
assert.equal($(cells[0]).text(), requestDateStr);
assert.equal($(cells[1]).text(), this.originSaveJSON.data[i].visit_type);
let html = '';
- if (saveStatus === 'succeed') {
+ if (saveStatus === 'succeeded') {
let browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(this.originSaveJSON.data[i].origin_url)}`;
browseOriginUrl += `×tamp=${encodeURIComponent(this.originSaveJSON.data[i].visit_date)}`;
html += `${this.originSaveJSON.data[i].origin_url}`;
} else {
html += this.originSaveJSON.data[i].origin_url;
}
html += ` `;
html += '';
assert.equal($(cells[2]).html(), html);
assert.equal($(cells[3]).text(), this.originSaveJSON.data[i].save_request_status);
assert.equal($(cells[4]).text(), saveStatus);
++i;
}
});
});
it('should not add timestamp to the browse origin URL is no visit date has been found', function() {
const originUrl = 'https://git.example.org/example.git';
const saveRequestData = genOriginSaveResponse({
saveRequestStatus: 'accepted',
originUrl: originUrl,
- saveTaskStatus: 'succeed',
+ saveTaskStatus: 'succeeded',
visitDate: null
});
const saveRequestsListData = {
'recordsTotal': 1,
'draw': 2,
'recordsFiltered': 1,
'data': [saveRequestData]
};
cy.route('GET', '/save/requests/list/**', saveRequestsListData);
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('tbody tr').then(rows => {
const firstRowCells = rows[0].cells;
const browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(originUrl)}`;
const browseOriginLink = `${originUrl}`;
expect($(firstRowCells[2]).html()).to.have.string(browseOriginLink);
});
});
it('should display/close task info popover when clicking on the info button', function() {
cy.route('GET', '/save/requests/list/**', '@originSaveJSON');
cy.route('GET', '/save/task/info/**', '@saveTaskInfoJSON');
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('be.visible');
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('not.be.visible');
});
it('should hide task info popover when clicking on the close button', function() {
cy.route('GET', '/save/requests/list/**', '@originSaveJSON');
cy.route('GET', '/save/task/info/**', '@saveTaskInfoJSON');
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('be.visible');
cy.get('.swh-save-request-info-close')
.click();
cy.get('.swh-save-request-info-popover')
.should('not.be.visible');
});
it('should fill save request form when clicking on "Save again" button', function() {
cy.route('GET', '/save/requests/list/**', '@originSaveJSON');
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-origin-again')
.eq(0)
.click();
cy.get('tbody tr').eq(0).then(row => {
const cells = row[0].cells;
cy.get('#swh-input-visit-type')
.should('have.value', $(cells[1]).text());
cy.get('#swh-input-origin-url')
.should('have.value', $(cells[2]).text().slice(0, -1));
});
});
it('should select correct visit type if possible when clicking on "Save again" button', function() {
const originUrl = 'https://gitlab.inria.fr/solverstack/maphys/maphys/';
const badVisitType = 'hg';
const goodVisitType = 'git';
cy.route('GET', '/save/requests/list/**', '@originSaveJSON');
stubSaveRequest({requestUrl: this.Urls.origin_save_request(badVisitType, originUrl),
visitType: badVisitType,
saveRequestStatus: 'accepted',
originUrl: originUrl,
saveTaskStatus: 'failed',
responseStatus: 200,
errorMessage: saveCodeMsg['accepted']});
makeOriginSaveRequest(badVisitType, originUrl);
cy.get('#swh-origin-save-requests-list-tab').click();
cy.wait('@saveRequest').then(() => {
cy.get('.swh-save-origin-again')
.eq(0)
.click();
cy.get('tbody tr').eq(0).then(row => {
const cells = row[0].cells;
cy.get('#swh-input-visit-type')
.should('have.value', goodVisitType);
cy.get('#swh-input-origin-url')
.should('have.value', $(cells[2]).text().slice(0, -1));
});
});
});
});
diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py
index a6b64c11..65896318 100644
--- a/swh/web/api/views/origin_save.py
+++ b/swh/web/api/views/origin_save.py
@@ -1,89 +1,89 @@
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.views.decorators.cache import never_cache
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.common.origin_save import (
create_save_origin_request,
get_save_origin_requests,
)
@api_route(
r"/origin/save/(?P.+)/url/(?P.+)/",
"api-1-save-origin",
methods=["GET", "POST"],
throttle_scope="swh_save_origin",
)
@never_cache
@api_doc("/origin/save/")
@format_docstring()
def api_save_origin(request, visit_type, origin_url):
"""
.. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/
.. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/
Request the saving of a software origin into the archive
or check the status of previously created save requests.
That endpoint enables to create a saving task for a software origin
through a POST request.
Depending of the provided origin url, the save request can either be:
* immediately **accepted**, for well known code hosting providers
like for instance GitHub or GitLab
* **rejected**, in case the url is blacklisted by Software Heritage
* **put in pending state** until a manual check is done in order to
determine if it can be loaded or not
Once a saving request has been accepted, its associated saving task
status can then be checked through a GET request on the same url.
Returned status can either be:
* **not created**: no saving task has been created
* **not yet scheduled**: saving task has been created but its
execution has not yet been scheduled
* **scheduled**: the task execution has been scheduled
- * **succeed**: the saving task has been successfully executed
+ * **succeeded**: the saving task has been successfully executed
* **failed**: the saving task has been executed but it failed
When issuing a POST request an object will be returned while a GET
request will return an array of objects (as multiple save requests
might have been submitted for the same origin).
:param string visit_type: the type of visit to perform
(currently the supported types are ``git``, ``hg`` and ``svn``)
:param string origin_url: the url of the origin to save
{common_headers}
:>json string origin_url: the url of the origin to save
:>json string visit_type: the type of visit to perform
:>json string save_request_date: the date (in iso format) the save
request was issued
:>json string save_request_status: the status of the save request,
either **accepted**, **rejected** or **pending**
:>json string save_task_status: the status of the origin saving task,
either **not created**, **not yet scheduled**, **scheduled**,
- **succeed** or **failed**
+ **succeeded** or **failed**
:statuscode 200: no error
:statuscode 400: an invalid visit type or origin url has been provided
:statuscode 403: the provided origin url is blacklisted
:statuscode 404: no save requests have been found for a given origin
"""
if request.method == "POST":
sor = create_save_origin_request(visit_type, origin_url)
del sor["id"]
else:
sor = get_save_origin_requests(visit_type, origin_url)
for s in sor:
del s["id"]
return sor
diff --git a/swh/web/assets/src/bundles/save/index.js b/swh/web/assets/src/bundles/save/index.js
index a50c5c82..0023ee7a 100644
--- a/swh/web/assets/src/bundles/save/index.js
+++ b/swh/web/assets/src/bundles/save/index.js
@@ -1,471 +1,471 @@
/**
* Copyright (C) 2018-2020 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {handleFetchError, csrfPost, isGitRepoUrl, htmlAlert, removeUrlFragment} from 'utils/functions';
import {swhSpinnerSrc} from 'utils/constants';
import {validate} from 'validate.js';
let saveRequestsTable;
function originSaveRequest(originType, originUrl,
acceptedCallback, pendingCallback, errorCallback) {
let addSaveOriginRequestUrl = Urls.origin_save_request(originType, originUrl);
let headers = {
'Accept': 'application/json',
'Content-Type': 'application/json'
};
$('.swh-processing-save-request').css('display', 'block');
csrfPost(addSaveOriginRequestUrl, headers)
.then(handleFetchError)
.then(response => response.json())
.then(data => {
$('.swh-processing-save-request').css('display', 'none');
if (data.save_request_status === 'accepted') {
acceptedCallback();
} else {
pendingCallback();
}
})
.catch(response => {
$('.swh-processing-save-request').css('display', 'none');
response.json().then(errorData => {
errorCallback(response.status, errorData);
});
});
}
export function initOriginSave() {
$(document).ready(() => {
$.fn.dataTable.ext.errMode = 'none';
fetch(Urls.origin_save_types_list())
.then(response => response.json())
.then(data => {
for (let originType of data) {
$('#swh-input-visit-type').append(``);
}
});
saveRequestsTable = $('#swh-origin-save-requests')
.on('error.dt', (e, settings, techNote, message) => {
$('#swh-origin-save-request-list-error').text('An error occurred while retrieving the save requests list');
console.log(message);
})
.DataTable({
serverSide: true,
processing: true,
language: {
processing: ``
},
ajax: Urls.origin_save_requests_list('all'),
searchDelay: 1000,
columns: [
{
data: 'save_request_date',
name: 'request_date',
render: (data, type, row) => {
if (type === 'display') {
let date = new Date(data);
return date.toLocaleString();
}
return data;
}
},
{
data: 'visit_type',
name: 'visit_type'
},
{
data: 'origin_url',
name: 'origin_url',
render: (data, type, row) => {
if (type === 'display') {
let html = '';
const sanitizedURL = $.fn.dataTable.render.text().display(data);
- if (row.save_task_status === 'succeed') {
+ if (row.save_task_status === 'succeeded') {
let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(sanitizedURL)}`;
if (row.visit_date) {
browseOriginUrl += `×tamp=${encodeURIComponent(row.visit_date)}`;
}
html += `${sanitizedURL}`;
} else {
html += sanitizedURL;
}
html += ` `;
return html;
}
return data;
}
},
{
data: 'save_request_status',
name: 'status'
},
{
data: 'save_task_status',
name: 'loading_task_status'
},
{
name: 'info',
render: (data, type, row) => {
- if (row.save_task_status === 'succeed' || row.save_task_status === 'failed') {
+ if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed') {
return ``;
} else {
return '';
}
}
},
{
render: (data, type, row) => {
if (row.save_request_status === 'accepted') {
const saveAgainButton =
'';
return saveAgainButton;
} else {
return '';
}
}
}
],
scrollY: '50vh',
scrollCollapse: true,
order: [[0, 'desc']],
responsive: {
details: {
type: 'none'
}
}
});
swh.webapp.addJumpToPagePopoverToDataTable(saveRequestsTable);
$('#swh-origin-save-requests-list-tab').on('shown.bs.tab', () => {
saveRequestsTable.draw();
window.location.hash = '#requests';
});
$('#swh-origin-save-request-help-tab').on('shown.bs.tab', () => {
removeUrlFragment();
$('.swh-save-request-info').popover('dispose');
});
let saveRequestAcceptedAlert = htmlAlert(
'success',
'The "save code now" request has been accepted and will be processed as soon as possible.',
true
);
let saveRequestPendingAlert = htmlAlert(
'warning',
'The "save code now" request has been put in pending state and may be accepted for processing after manual review.',
true
);
let saveRequestRateLimitedAlert = htmlAlert(
'danger',
'The rate limit for "save code now" requests has been reached. Please try again later.',
true
);
let saveRequestUnknownErrorAlert = htmlAlert(
'danger',
'An unexpected error happened when submitting the "save code now request".',
true
);
$('#swh-save-origin-form').submit(event => {
event.preventDefault();
event.stopPropagation();
$('.alert').alert('close');
if (event.target.checkValidity()) {
$(event.target).removeClass('was-validated');
let originType = $('#swh-input-visit-type').val();
let originUrl = $('#swh-input-origin-url').val();
originSaveRequest(originType, originUrl,
() => $('#swh-origin-save-request-status').html(saveRequestAcceptedAlert),
() => $('#swh-origin-save-request-status').html(saveRequestPendingAlert),
(statusCode, errorData) => {
$('#swh-origin-save-request-status').css('color', 'red');
if (statusCode === 403) {
const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`);
$('#swh-origin-save-request-status').html(errorAlert);
} else if (statusCode === 429) {
$('#swh-origin-save-request-status').html(saveRequestRateLimitedAlert);
} else {
$('#swh-origin-save-request-status').html(saveRequestUnknownErrorAlert);
}
});
} else {
$(event.target).addClass('was-validated');
}
});
$('#swh-show-origin-save-requests-list').on('click', (event) => {
event.preventDefault();
$('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show');
});
$('#swh-input-origin-url').on('input', function(event) {
let originUrl = $(this).val().trim();
$(this).val(originUrl);
$('#swh-input-visit-type option').each(function() {
let val = $(this).val();
if (val && originUrl.includes(val)) {
$(this).prop('selected', true);
}
});
});
if (window.location.hash === '#requests') {
$('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show');
}
});
}
export function validateSaveOriginUrl(input) {
let originUrl = input.value.trim();
let validUrl = validate({website: originUrl}, {
website: {
url: {
schemes: ['http', 'https', 'svn', 'git']
}
}
}) === undefined;
let originType = $('#swh-input-visit-type').val();
if (originType === 'git' && validUrl) {
// additional checks for well known code hosting providers
let githubIdx = originUrl.indexOf('://github.com');
let gitlabIdx = originUrl.indexOf('://gitlab.');
let gitSfIdx = originUrl.indexOf('://git.code.sf.net');
let bitbucketIdx = originUrl.indexOf('://bitbucket.org');
if (githubIdx !== -1 && githubIdx <= 5) {
validUrl = isGitRepoUrl(originUrl, 'github.com');
} else if (gitlabIdx !== -1 && gitlabIdx <= 5) {
let startIdx = gitlabIdx + 3;
let idx = originUrl.indexOf('/', startIdx);
if (idx !== -1) {
let gitlabDomain = originUrl.substr(startIdx, idx - startIdx);
validUrl = isGitRepoUrl(originUrl, gitlabDomain);
} else {
validUrl = false;
}
} else if (gitSfIdx !== -1 && gitSfIdx <= 5) {
validUrl = isGitRepoUrl(originUrl, 'git.code.sf.net/p');
} else if (bitbucketIdx !== -1 && bitbucketIdx <= 5) {
validUrl = isGitRepoUrl(originUrl, 'bitbucket.org');
}
}
if (validUrl) {
input.setCustomValidity('');
} else {
input.setCustomValidity('The origin url is not valid or does not reference a code repository');
}
}
export function initTakeNewSnapshot() {
let newSnapshotRequestAcceptedAlert = htmlAlert(
'success',
'The "take new snapshot" request has been accepted and will be processed as soon as possible.',
true
);
let newSnapshotRequestPendingAlert = htmlAlert(
'warning',
'The "take new snapshot" request has been put in pending state and may be accepted for processing after manual review.',
true
);
let newSnapshotRequestRateLimitAlert = htmlAlert(
'danger',
'The rate limit for "take new snapshot" requests has been reached. Please try again later.',
true
);
let newSnapshotRequestUnknownErrorAlert = htmlAlert(
'danger',
'An unexpected error happened when submitting the "save code now request".',
true
);
$(document).ready(() => {
$('#swh-take-new-snapshot-form').submit(event => {
event.preventDefault();
event.stopPropagation();
let originType = $('#swh-input-visit-type').val();
let originUrl = $('#swh-input-origin-url').val();
originSaveRequest(originType, originUrl,
() => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestAcceptedAlert),
() => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestPendingAlert),
(statusCode, errorData) => {
$('#swh-take-new-snapshot-request-status').css('color', 'red');
if (statusCode === 403) {
const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`, true);
$('#swh-take-new-snapshot-request-status').html(errorAlert);
} else if (statusCode === 429) {
$('#swh-take-new-snapshot-request-status').html(newSnapshotRequestRateLimitAlert);
} else {
$('#swh-take-new-snapshot-request-status').html(newSnapshotRequestUnknownErrorAlert);
}
});
});
});
}
export function displaySaveRequestInfo(event, saveRequestId) {
event.stopPropagation();
const saveRequestTaskInfoUrl = Urls.origin_save_task_info(saveRequestId);
// close popover when clicking again on the info icon
if ($(event.target).data('bs.popover')) {
$(event.target).popover('dispose');
return;
}
$('.swh-save-request-info').popover('dispose');
$(event.target).popover({
animation: false,
boundary: 'viewport',
container: 'body',
title: 'Save request task information ' +
'`,
content: `
';
for (let info of saveRequestInfo) {
content +=
`
${info.key}
${info.value}
`;
}
content += '
';
}
$('.swh-popover').html(content);
$(event.target).popover('update');
});
}
export function fillSaveRequestFormAndScroll(visitType, originUrl) {
$('#swh-input-origin-url').val(originUrl);
let originTypeFound = false;
$('#swh-input-visit-type option').each(function() {
let val = $(this).val();
if (val && originUrl.includes(val)) {
$(this).prop('selected', true);
originTypeFound = true;
}
});
if (!originTypeFound) {
$('#swh-input-visit-type option').each(function() {
let val = $(this).val();
if (val === visitType) {
$(this).prop('selected', true);
}
});
}
window.scrollTo(0, 0);
}
diff --git a/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py b/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py
new file mode 100644
index 00000000..a2589c17
--- /dev/null
+++ b/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from django.db import migrations, models
+
+
+def _rename_request_status_from_succeed_to_succeeded(apps, schema_editor):
+ """
+ Fix a typo in save request status value.
+ """
+ SaveOriginRequest = apps.get_model("swh.web.common", "SaveOriginRequest")
+ for sor in SaveOriginRequest.objects.all():
+ if sor.loading_task_status == "succeed":
+ sor.loading_task_status = "succeeded"
+ sor.save()
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("swh.web.common", "0006_rename_origin_type"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="saveoriginrequest",
+ name="loading_task_status",
+ field=models.TextField(
+ choices=[
+ ("not created", "not created"),
+ ("not yet scheduled", "not yet scheduled"),
+ ("scheduled", "scheduled"),
+ ("succeeded", "succeeded"),
+ ("failed", "failed"),
+ ("running", "running"),
+ ],
+ default="not created",
+ ),
+ ),
+ migrations.RunPython(_rename_request_status_from_succeed_to_succeeded),
+ ]
diff --git a/swh/web/common/models.py b/swh/web/common/models.py
index b0e54cd6..f2d21a80 100644
--- a/swh/web/common/models.py
+++ b/swh/web/common/models.py
@@ -1,99 +1,99 @@
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.db import models
class SaveAuthorizedOrigin(models.Model):
"""
Model table holding origin urls authorized to be loaded into the archive.
"""
url = models.CharField(max_length=200, null=False)
class Meta:
app_label = "swh.web.common"
db_table = "save_authorized_origin"
def __str__(self):
return self.url
class SaveUnauthorizedOrigin(models.Model):
"""
Model table holding origin urls not authorized to be loaded into the
archive.
"""
url = models.CharField(max_length=200, null=False)
class Meta:
app_label = "swh.web.common"
db_table = "save_unauthorized_origin"
def __str__(self):
return self.url
SAVE_REQUEST_ACCEPTED = "accepted"
SAVE_REQUEST_REJECTED = "rejected"
SAVE_REQUEST_PENDING = "pending"
SAVE_REQUEST_STATUS = [
(SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED),
(SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED),
(SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING),
]
SAVE_TASK_NOT_CREATED = "not created"
SAVE_TASK_NOT_YET_SCHEDULED = "not yet scheduled"
SAVE_TASK_SCHEDULED = "scheduled"
-SAVE_TASK_SUCCEED = "succeed"
+SAVE_TASK_SUCCEEDED = "succeeded"
SAVE_TASK_FAILED = "failed"
SAVE_TASK_RUNNING = "running"
SAVE_TASK_STATUS = [
(SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED),
(SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED),
(SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED),
- (SAVE_TASK_SUCCEED, SAVE_TASK_SUCCEED),
+ (SAVE_TASK_SUCCEEDED, SAVE_TASK_SUCCEEDED),
(SAVE_TASK_FAILED, SAVE_TASK_FAILED),
(SAVE_TASK_RUNNING, SAVE_TASK_RUNNING),
]
class SaveOriginRequest(models.Model):
"""
Model table holding all the save origin requests issued by users.
"""
id = models.BigAutoField(primary_key=True)
request_date = models.DateTimeField(auto_now_add=True)
visit_type = models.CharField(max_length=200, null=False)
origin_url = models.CharField(max_length=200, null=False)
status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING)
loading_task_id = models.IntegerField(default=-1)
visit_date = models.DateTimeField(null=True)
loading_task_status = models.TextField(
choices=SAVE_TASK_STATUS, default=SAVE_TASK_NOT_CREATED
)
class Meta:
app_label = "swh.web.common"
db_table = "save_origin_request"
ordering = ["-id"]
def __str__(self):
return str(
{
"id": self.id,
"request_date": self.request_date,
"visit_type": self.visit_type,
"origin_url": self.origin_url,
"status": self.status,
"loading_task_id": self.loading_task_id,
"visit_date": self.visit_date,
}
)
diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py
index 510f1797..0e2822ec 100644
--- a/swh/web/common/origin_save.py
+++ b/swh/web/common/origin_save.py
@@ -1,617 +1,623 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from bisect import bisect_right
from datetime import datetime, timedelta, timezone
from itertools import product
import json
import logging
from typing import Any, Dict
from prometheus_client import Gauge
import requests
import sentry_sdk
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.validators import URLValidator
from django.utils.html import escape
from swh.scheduler.utils import create_oneshot_task_dict
from swh.web import config
from swh.web.common import service
from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
SaveAuthorizedOrigin,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc
scheduler = config.scheduler()
logger = logging.getLogger(__name__)
def get_origin_save_authorized_urls():
"""
Get the list of origin url prefixes authorized to be
immediately loaded into the archive (whitelist).
Returns:
list: The list of authorized origin url prefix
"""
return [origin.url for origin in SaveAuthorizedOrigin.objects.all()]
def get_origin_save_unauthorized_urls():
"""
Get the list of origin url prefixes forbidden to be
loaded into the archive (blacklist).
Returns:
list: the list of unauthorized origin url prefix
"""
return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()]
def can_save_origin(origin_url):
"""
Check if a software origin can be saved into the archive.
Based on the origin url, the save request will be either:
* immediately accepted if the url is whitelisted
* rejected if the url is blacklisted
* put in pending state for manual review otherwise
Args:
origin_url (str): the software origin url to check
Returns:
str: the origin save request status, either **accepted**,
**rejected** or **pending**
"""
# origin url may be blacklisted
for url_prefix in get_origin_save_unauthorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_REJECTED
# if the origin url is in the white list, it can be immediately saved
for url_prefix in get_origin_save_authorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_ACCEPTED
# otherwise, the origin url needs to be manually verified
return SAVE_REQUEST_PENDING
# map visit type to scheduler task
# TODO: do not hardcode the task name here (T1157)
_visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"}
# map scheduler task status to origin save status
_save_task_status = {
"next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED,
"next_run_scheduled": SAVE_TASK_SCHEDULED,
- "completed": SAVE_TASK_SUCCEED,
+ "completed": SAVE_TASK_SUCCEEDED,
"disabled": SAVE_TASK_FAILED,
}
def get_savable_visit_types():
return sorted(list(_visit_type_task.keys()))
def _check_visit_type_savable(visit_type):
"""
Get the list of visit types that can be performed
through a save request.
Returns:
list: the list of saveable visit types
"""
allowed_visit_types = ", ".join(get_savable_visit_types())
if visit_type not in _visit_type_task:
raise BadInputExc(
"Visit of type %s can not be saved! "
"Allowed types are the following: %s" % (visit_type, allowed_visit_types)
)
_validate_url = URLValidator(schemes=["http", "https", "svn", "git"])
def _check_origin_url_valid(origin_url):
try:
_validate_url(origin_url)
except ValidationError:
raise BadInputExc(
"The provided origin url (%s) is not valid!" % escape(origin_url)
)
def _get_visit_info_for_save_request(save_request):
visit_date = None
visit_status = None
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# stop trying to find a visit date one month after save request submission
# as those requests to storage are expensive and associated loading task
# surely ended up with errors
if time_delta.days <= 30:
try:
origin = {"url": save_request.origin_url}
origin_info = service.lookup_origin(origin)
origin_visits = get_origin_visits(origin_info)
visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits]
i = bisect_right(visit_dates, save_request.request_date)
if i != len(visit_dates):
visit_date = visit_dates[i]
visit_status = origin_visits[i]["status"]
if origin_visits[i]["status"] not in ("full", "partial"):
visit_date = None
except Exception as exc:
sentry_sdk.capture_exception(exc)
return visit_date, visit_status
def _check_visit_update_status(save_request, save_task_status):
visit_date, visit_status = _get_visit_info_for_save_request(save_request)
save_request.visit_date = visit_date
# visit has been performed, mark the saving task as succeed
if visit_date and visit_status is not None:
- save_task_status = SAVE_TASK_SUCCEED
+ save_task_status = SAVE_TASK_SUCCEEDED
elif visit_status == "ongoing":
save_task_status = SAVE_TASK_RUNNING
else:
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# consider the task as failed if it is still in scheduled state
# 30 days after its submission
if time_delta.days > 30:
save_task_status = SAVE_TASK_FAILED
return visit_date, save_task_status
def _save_request_dict(save_request, task=None):
must_save = False
visit_date = save_request.visit_date
# save task still in scheduler db
if task:
save_task_status = _save_task_status[task["status"]]
# Consider request from which a visit date has already been found
# as succeeded to avoid retrieving it again
if save_task_status == SAVE_TASK_SCHEDULED and visit_date:
- save_task_status = SAVE_TASK_SUCCEED
- if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) and not visit_date:
+ save_task_status = SAVE_TASK_SUCCEEDED
+ if (
+ save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED)
+ and not visit_date
+ ):
visit_date, _ = _get_visit_info_for_save_request(save_request)
save_request.visit_date = visit_date
must_save = True
# Check tasks still marked as scheduled / not yet scheduled
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED):
visit_date, save_task_status = _check_visit_update_status(
save_request, save_task_status
)
# save task may have been archived
else:
save_task_status = save_request.loading_task_status
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED):
visit_date, save_task_status = _check_visit_update_status(
save_request, save_task_status
)
else:
save_task_status = save_request.loading_task_status
if save_request.loading_task_status != save_task_status:
save_request.loading_task_status = save_task_status
must_save = True
if must_save:
save_request.save()
return {
"id": save_request.id,
"visit_type": save_request.visit_type,
"origin_url": save_request.origin_url,
"save_request_date": save_request.request_date.isoformat(),
"save_request_status": save_request.status,
"save_task_status": save_task_status,
"visit_date": visit_date.isoformat() if visit_date else None,
}
def create_save_origin_request(visit_type, origin_url):
"""
Create a loading task to save a software origin into the archive.
This function aims to create a software origin loading task
trough the use of the swh-scheduler component.
First, some checks are performed to see if the visit type and origin
url are valid but also if the the save request can be accepted.
If those checks passed, the loading task is then created.
Otherwise, the save request is put in pending or rejected state.
All the submitted save requests are logged into the swh-web
database to keep track of them.
Args:
visit_type (str): the type of visit to perform (currently only
``git`` but ``svn`` and ``hg`` will soon be available)
origin_url (str): the url of the origin to save
Raises:
BadInputExc: the visit type or origin url is invalid
ForbiddenExc: the provided origin url is blacklisted
Returns:
dict: A dict describing the save request with the following keys:
* **visit_type**: the type of visit to perform
* **origin_url**: the url of the origin
* **save_request_date**: the date the request was submitted
* **save_request_status**: the request status, either **accepted**,
**rejected** or **pending**
* **save_task_status**: the origin loading task status, either
**not created**, **not yet scheduled**, **scheduled**,
**succeed** or **failed**
"""
_check_visit_type_savable(visit_type)
_check_origin_url_valid(origin_url)
save_request_status = can_save_origin(origin_url)
task = None
# if the origin save request is accepted, create a scheduler
# task to load it into the archive
if save_request_status == SAVE_REQUEST_ACCEPTED:
# create a task with high priority
kwargs = {
"priority": "high",
"url": origin_url,
}
sor = None
# get list of previously sumitted save requests
current_sors = list(
SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
)
)
can_create_task = False
# if no save requests previously submitted, create the scheduler task
if not current_sors:
can_create_task = True
else:
# get the latest submitted save request
sor = current_sors[0]
# if it was in pending state, we need to create the scheduler task
# and update the save request info in the database
if sor.status == SAVE_REQUEST_PENDING:
can_create_task = True
# a task has already been created to load the origin
elif sor.loading_task_id != -1:
# get the scheduler task and its status
tasks = scheduler.get_tasks([sor.loading_task_id])
task = tasks[0] if tasks else None
task_status = _save_request_dict(sor, task)["save_task_status"]
# create a new scheduler task only if the previous one has been
# already executed
- if task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEED:
+ if (
+ task_status == SAVE_TASK_FAILED
+ or task_status == SAVE_TASK_SUCCEEDED
+ ):
can_create_task = True
sor = None
else:
can_create_task = False
if can_create_task:
# effectively create the scheduler task
task_dict = create_oneshot_task_dict(_visit_type_task[visit_type], **kwargs)
task = scheduler.create_tasks([task_dict])[0]
# pending save request has been accepted
if sor:
sor.status = SAVE_REQUEST_ACCEPTED
sor.loading_task_id = task["id"]
sor.save()
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
loading_task_id=task["id"],
)
# save request must be manually reviewed for acceptation
elif save_request_status == SAVE_REQUEST_PENDING:
# check if there is already such a save request already submitted,
# no need to add it to the database in that case
try:
sor = SaveOriginRequest.objects.get(
visit_type=visit_type, origin_url=origin_url, status=save_request_status
)
# if not add it to the database
except ObjectDoesNotExist:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type, origin_url=origin_url, status=save_request_status
)
# origin can not be saved as its url is blacklisted,
# log the request to the database anyway
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type, origin_url=origin_url, status=save_request_status
)
if save_request_status == SAVE_REQUEST_REJECTED:
raise ForbiddenExc(
(
'The "save code now" request has been rejected '
"because the provided origin url is blacklisted."
)
)
return _save_request_dict(sor, task)
def get_save_origin_requests_from_queryset(requests_queryset):
"""
Get all save requests from a SaveOriginRequest queryset.
Args:
requests_queryset (django.db.models.QuerySet): input
SaveOriginRequest queryset
Returns:
list: A list of save origin requests dict as described in
:func:`swh.web.common.origin_save.create_save_origin_request`
"""
task_ids = []
for sor in requests_queryset:
task_ids.append(sor.loading_task_id)
save_requests = []
if task_ids:
tasks = scheduler.get_tasks(task_ids)
tasks = {task["id"]: task for task in tasks}
for sor in requests_queryset:
sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id))
save_requests.append(sr_dict)
return save_requests
def get_save_origin_requests(visit_type, origin_url):
"""
Get all save requests for a given software origin.
Args:
visit_type (str): the type of visit
origin_url (str): the url of the origin
Raises:
BadInputExc: the visit type or origin url is invalid
swh.web.common.exc.NotFoundExc: no save requests can be found for the
given origin
Returns:
list: A list of save origin requests dict as described in
:func:`swh.web.common.origin_save.create_save_origin_request`
"""
_check_visit_type_savable(visit_type)
_check_origin_url_valid(origin_url)
sors = SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
)
if sors.count() == 0:
raise NotFoundExc(
("No save requests found for visit of type " "%s on origin with url %s.")
% (visit_type, origin_url)
)
return get_save_origin_requests_from_queryset(sors)
def get_save_origin_task_info(
save_request_id: int, full_info: bool = True
) -> Dict[str, Any]:
"""
Get detailed information about an accepted save origin request
and its associated loading task.
If the associated loading task info is archived and removed
from the scheduler database, returns an empty dictionary.
Args:
save_request_id: identifier of a save origin request
full_info: whether to return detailed info for staff users
Returns:
A dictionary with the following keys:
- **type**: loading task type
- **arguments**: loading task arguments
- **id**: loading task database identifier
- **backend_id**: loading task celery identifier
- **scheduled**: loading task scheduling date
- **ended**: loading task termination date
- **status**: loading task execution status
Depending on the availability of the task logs in the elasticsearch
cluster of Software Heritage, the returned dictionary may also
contain the following keys:
- **name**: associated celery task name
- **message**: relevant log message from task execution
- **duration**: task execution time (only if it succeeded)
- **worker**: name of the worker that executed the task
"""
try:
save_request = SaveOriginRequest.objects.get(id=save_request_id)
except ObjectDoesNotExist:
return {}
task = scheduler.get_tasks([save_request.loading_task_id])
task = task[0] if task else None
if task is None:
return {}
task_run = scheduler.get_task_runs([task["id"]])
task_run = task_run[0] if task_run else None
if task_run is None:
return {}
task_run["type"] = task["type"]
task_run["arguments"] = task["arguments"]
task_run["id"] = task_run["task"]
del task_run["task"]
del task_run["metadata"]
es_workers_index_url = config.get_config()["es_workers_index_url"]
if not es_workers_index_url:
return task_run
es_workers_index_url += "/_search"
if save_request.visit_date:
min_ts = save_request.visit_date
max_ts = min_ts + timedelta(days=7)
else:
min_ts = save_request.request_date
max_ts = min_ts + timedelta(days=30)
min_ts_unix = int(min_ts.timestamp()) * 1000
max_ts_unix = int(max_ts.timestamp()) * 1000
save_task_status = _save_task_status[task["status"]]
priority = "3" if save_task_status == SAVE_TASK_FAILED else "6"
query = {
"bool": {
"must": [
{"match_phrase": {"priority": {"query": priority}}},
{"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}},
{
"range": {
"@timestamp": {
"gte": min_ts_unix,
"lte": max_ts_unix,
"format": "epoch_millis",
}
}
},
]
}
}
try:
response = requests.post(
es_workers_index_url,
json={"query": query, "sort": ["@timestamp"]},
timeout=30,
)
results = json.loads(response.text)
if results["hits"]["total"]["value"] >= 1:
task_run_info = results["hits"]["hits"][-1]["_source"]
if "swh_logging_args_runtime" in task_run_info:
duration = task_run_info["swh_logging_args_runtime"]
task_run["duration"] = duration
if "message" in task_run_info:
task_run["message"] = task_run_info["message"]
if "swh_logging_args_name" in task_run_info:
task_run["name"] = task_run_info["swh_logging_args_name"]
elif "swh_task_name" in task_run_info:
task_run["name"] = task_run_info["swh_task_name"]
if "hostname" in task_run_info:
task_run["worker"] = task_run_info["hostname"]
elif "host" in task_run_info:
task_run["worker"] = task_run_info["host"]
except Exception as exc:
logger.warning("Request to Elasticsearch failed\n%s", exc)
sentry_sdk.capture_exception(exc)
if not full_info:
for field in ("id", "backend_id", "worker"):
# remove some staff only fields
task_run.pop(field, None)
if "message" in task_run and "Loading failure" in task_run["message"]:
# hide traceback for non staff users, only display exception
message_lines = task_run["message"].split("\n")
message = ""
for line in message_lines:
if line.startswith("Traceback"):
break
message += f"{line}\n"
message += message_lines[-1]
task_run["message"] = message
return task_run
SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests"
_submitted_save_requests_gauge = Gauge(
name=SUBMITTED_SAVE_REQUESTS_METRIC,
documentation="Number of submitted origin save requests",
labelnames=["status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests"
_accepted_save_requests_gauge = Gauge(
name=ACCEPTED_SAVE_REQUESTS_METRIC,
documentation="Number of accepted origin save requests",
labelnames=["load_task_status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
def compute_save_requests_metrics():
"""Compute a couple of Prometheus metrics related to
origin save requests"""
request_statuses = (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_REJECTED,
SAVE_REQUEST_PENDING,
)
load_task_statuses = (
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
SAVE_TASK_FAILED,
SAVE_TASK_RUNNING,
)
visit_types = get_savable_visit_types()
labels_set = product(request_statuses, visit_types)
for labels in labels_set:
_submitted_save_requests_gauge.labels(*labels).set(0)
labels_set = product(load_task_statuses, visit_types)
for labels in labels_set:
_accepted_save_requests_gauge.labels(*labels).set(0)
for sor in SaveOriginRequest.objects.all():
if sor.status == SAVE_REQUEST_ACCEPTED:
_accepted_save_requests_gauge.labels(
load_task_status=sor.loading_task_status, visit_type=sor.visit_type
).inc()
_submitted_save_requests_gauge.labels(
status=sor.status, visit_type=sor.visit_type
).inc()
diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py
index c3c20ddb..76318099 100644
--- a/swh/web/tests/api/views/test_origin_save.py
+++ b/swh/web/tests/api/views/test_origin_save.py
@@ -1,321 +1,321 @@
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta
import pytest
from django.utils import timezone
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
from swh.web.common.utils import reverse
from swh.web.tests.api.views import check_api_get_responses, check_api_post_responses
pytestmark = pytest.mark.django_db
@pytest.fixture(autouse=True)
def populated_db():
SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo")
SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude")
def test_invalid_visit_type(api_client):
url = reverse(
"api-1-save-origin",
url_args={
"visit_type": "foo",
"origin_url": "https://github.com/torvalds/linux",
},
)
check_api_get_responses(api_client, url, status_code=400)
def test_invalid_origin_url(api_client):
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"}
)
check_api_get_responses(api_client, url, status_code=400)
def check_created_save_request_status(
api_client,
mocker,
origin_url,
scheduler_task_status,
expected_request_status,
expected_task_status=None,
visit_date=None,
):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
if not scheduler_task_status:
mock_scheduler.get_tasks.return_value = []
else:
mock_scheduler.get_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": scheduler_task_status,
"id": 1,
}
]
mock_scheduler.create_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": "next_run_not_scheduled",
"id": 1,
}
]
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
)
mock_visit_date = mocker.patch(
("swh.web.common.origin_save." "_get_visit_info_for_save_request")
)
mock_visit_date.return_value = (visit_date, None)
if expected_request_status != SAVE_REQUEST_REJECTED:
response = check_api_post_responses(api_client, url, data=None, status_code=200)
assert response.data["save_request_status"] == expected_request_status
assert response.data["save_task_status"] == expected_task_status
else:
check_api_post_responses(api_client, url, data=None, status_code=403)
def check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status,
expected_task_status,
scheduler_task_status="next_run_not_scheduled",
visit_date=None,
):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
mock_scheduler.get_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": scheduler_task_status,
"id": 1,
}
]
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
)
mock_visit_date = mocker.patch(
("swh.web.common.origin_save." "_get_visit_info_for_save_request")
)
mock_visit_date.return_value = (visit_date, None)
response = check_api_get_responses(api_client, url, status_code=200)
save_request_data = response.data[0]
assert save_request_data["save_request_status"] == expected_request_status
assert save_request_data["save_task_status"] == expected_task_status
# Check that save task status is still available when
# the scheduler task has been archived
mock_scheduler.get_tasks.return_value = []
response = check_api_get_responses(api_client, url, status_code=200)
save_request_data = response.data[0]
assert save_request_data["save_task_status"] == expected_task_status
def test_save_request_rejected(api_client, mocker):
origin_url = "https://github.com/user/illegal_repo"
check_created_save_request_status(
api_client, mocker, origin_url, None, SAVE_REQUEST_REJECTED
)
check_save_request_status(
api_client, mocker, origin_url, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_CREATED
)
def test_save_request_pending(api_client, mocker):
origin_url = "https://unkwownforge.com/user/repo"
check_created_save_request_status(
api_client,
mocker,
origin_url,
None,
SAVE_REQUEST_PENDING,
SAVE_TASK_NOT_CREATED,
)
check_save_request_status(
api_client, mocker, origin_url, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_CREATED
)
def test_save_request_succeed(api_client, mocker):
origin_url = "https://github.com/Kitware/CMake"
check_created_save_request_status(
api_client,
mocker,
origin_url,
None,
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_NOT_YET_SCHEDULED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_SCHEDULED,
scheduler_task_status="next_run_scheduled",
)
check_save_request_status(
api_client,
mocker,
origin_url,
SAVE_REQUEST_ACCEPTED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
scheduler_task_status="completed",
visit_date=None,
)
visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
check_save_request_status(
api_client,
mocker,
origin_url,
SAVE_REQUEST_ACCEPTED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
scheduler_task_status="completed",
visit_date=visit_date,
)
def test_save_request_failed(api_client, mocker):
origin_url = "https://gitlab.com/inkscape/inkscape"
check_created_save_request_status(
api_client,
mocker,
origin_url,
None,
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_NOT_YET_SCHEDULED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_SCHEDULED,
scheduler_task_status="next_run_scheduled",
)
check_save_request_status(
api_client,
mocker,
origin_url,
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_FAILED,
scheduler_task_status="disabled",
)
def test_create_save_request_only_when_needed(api_client, mocker):
origin_url = "https://github.com/webpack/webpack"
SaveOriginRequest.objects.create(
visit_type="git",
origin_url=origin_url,
status=SAVE_REQUEST_ACCEPTED,
loading_task_id=56,
)
check_created_save_request_status(
api_client,
mocker,
origin_url,
"next_run_not_scheduled",
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_NOT_YET_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 1
check_created_save_request_status(
api_client,
mocker,
origin_url,
"next_run_scheduled",
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 1
visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
check_created_save_request_status(
api_client,
mocker,
origin_url,
"completed",
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_NOT_YET_SCHEDULED,
visit_date=visit_date,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
# check_api_post_responses sends two POST requests to check YAML and JSON response
assert len(sors) == 3
check_created_save_request_status(
api_client,
mocker,
origin_url,
"disabled",
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_NOT_YET_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 5
def test_get_save_requests_unknown_origin(api_client):
unknown_origin_url = "https://gitlab.com/foo/bar"
url = reverse(
"api-1-save-origin",
url_args={"visit_type": "git", "origin_url": unknown_origin_url},
)
response = check_api_get_responses(api_client, url, status_code=404)
assert response.data == {
"exception": "NotFoundExc",
"reason": (
"No save requests found for visit of type " "git on origin with url %s."
)
% unknown_origin_url,
}
diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py
index a64cf066..8f7ebd2a 100644
--- a/swh/web/tests/common/test_origin_save.py
+++ b/swh/web/tests/common/test_origin_save.py
@@ -1,264 +1,262 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta, timezone
from functools import partial
import re
import pytest
import requests
from swh.core.pytest_plugin import get_response_cb
from swh.web.common.models import SaveOriginRequest
from swh.web.common.origin_save import (
get_save_origin_requests,
get_save_origin_task_info,
)
from swh.web.common.typing import OriginVisitInfo
from swh.web.config import get_config
_es_url = "http://esnode1.internal.softwareheritage.org:9200"
_es_workers_index_url = "%s/swh_workers-*" % _es_url
_origin_url = "https://gitlab.com/inkscape/inkscape"
_visit_type = "git"
_task_id = 203525448
@pytest.fixture(autouse=True)
def requests_mock_datadir(datadir, requests_mock_datadir):
- """Override default behavior to deal with post method
-
- """
+ """Override default behavior to deal with post method"""
cb = partial(get_response_cb, datadir=datadir)
requests_mock_datadir.post(re.compile("https?://"), body=cb)
return requests_mock_datadir
@pytest.mark.django_db
def test_get_save_origin_archived_task_info(mocker):
_get_save_origin_task_info_test(mocker, task_archived=True)
@pytest.mark.django_db
def test_get_save_origin_task_full_info_with_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=True)
@pytest.mark.django_db
def test_get_save_origin_task_info_with_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=True, full_info=False)
@pytest.mark.django_db
def test_get_save_origin_task_info_without_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=False)
-def _mock_scheduler(mocker, task_status="succeed", task_archived=False):
+def _mock_scheduler(mocker, task_status="succeeded", task_archived=False):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
task = (
{
"arguments": {"args": [], "kwargs": {"repo_url": _origin_url},},
"current_interval": timedelta(days=64),
"id": _task_id,
"next_run": datetime.now(tz=timezone.utc) + timedelta(days=64),
"policy": "oneshot",
"priority": "high",
"retries_left": 0,
"status": "disabled",
"type": "load-git",
}
if not task_archived
else None
)
mock_scheduler.get_tasks.return_value = [dict(task) if task else None]
task_run = {
"backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
"ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
"id": 654270631,
"metadata": {},
"scheduled": datetime.now(tz=timezone.utc),
"started": None,
"status": task_status,
"task": _task_id,
}
mock_scheduler.get_task_runs.return_value = [dict(task_run)]
return task, task_run
def _get_save_origin_task_info_test(
mocker, task_archived=False, es_available=True, full_info=True
):
swh_web_config = get_config()
if es_available:
swh_web_config.update({"es_workers_index_url": _es_workers_index_url})
else:
swh_web_config.update({"es_workers_index_url": ""})
sor = SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
origin_url="https://gitlab.com/inkscape/inkscape",
status="accepted",
visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1),
loading_task_id=_task_id,
)
task, task_run = _mock_scheduler(mocker, task_archived=task_archived)
es_response = requests.post("%s/_search" % _es_workers_index_url).json()
task_exec_data = es_response["hits"]["hits"][-1]["_source"]
sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info)
expected_result = (
{
"type": task["type"],
"arguments": task["arguments"],
"id": task["id"],
"backend_id": task_run["backend_id"],
"scheduled": task_run["scheduled"],
"started": task_run["started"],
"ended": task_run["ended"],
"status": task_run["status"],
}
if not task_archived
else {}
)
if es_available and not task_archived:
expected_result.update(
{
"message": task_exec_data["message"],
"name": task_exec_data["swh_task_name"],
"worker": task_exec_data["hostname"],
}
)
if not full_info:
expected_result.pop("id", None)
expected_result.pop("backend_id", None)
expected_result.pop("worker", None)
if "message" in expected_result:
message = ""
message_lines = expected_result["message"].split("\n")
for line in message_lines:
if line.startswith("Traceback"):
break
message += f"{line}\n"
message += message_lines[-1]
expected_result["message"] = message
assert sor_task_info == expected_result
@pytest.mark.django_db
def test_get_save_origin_requests_find_visit_date(mocker):
# create a save request
SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
origin_url=_origin_url,
status="accepted",
visit_date=None,
loading_task_id=_task_id,
)
# mock scheduler and services
_mock_scheduler(mocker)
mock_service = mocker.patch("swh.web.common.origin_save.service")
mock_service.lookup_origin.return_value = {"url": _origin_url}
mock_get_origin_visits = mocker.patch(
"swh.web.common.origin_save.get_origin_visits"
)
# create a visit for the save request
visit_date = datetime.now(tz=timezone.utc).isoformat()
visit_info = OriginVisitInfo(
date=visit_date,
formatted_date="",
metadata={},
origin=_origin_url,
snapshot="",
status="full",
type=_visit_type,
url="",
visit=34,
)
mock_get_origin_visits.return_value = [visit_info]
# check visit date has been correctly found
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 1
assert sors[0]["visit_date"] == visit_date
mock_get_origin_visits.assert_called_once()
# check visit is not searched again when it has been found
get_save_origin_requests(_visit_type, _origin_url)
mock_get_origin_visits.assert_called_once()
# check visit date are not searched for save requests older than
# one month
sor = SaveOriginRequest.objects.create(
visit_type=_visit_type,
origin_url=_origin_url,
status="accepted",
loading_task_id=_task_id,
visit_date=None,
)
sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31)
sor.save()
_mock_scheduler(mocker, task_status="failed")
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 2
assert sors[0]["visit_date"] is None
mock_get_origin_visits.assert_called_once()
@pytest.mark.django_db
def test_get_save_origin_requests_no_visit_date_found(mocker):
# create a save request
SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
origin_url=_origin_url,
status="accepted",
visit_date=None,
loading_task_id=_task_id,
)
# mock scheduler and services
_mock_scheduler(mocker)
mock_service = mocker.patch("swh.web.common.origin_save.service")
mock_service.lookup_origin.return_value = {"url": _origin_url}
mock_get_origin_visits = mocker.patch(
"swh.web.common.origin_save.get_origin_visits"
)
# create a visit for the save request with status created
visit_date = datetime.now(tz=timezone.utc).isoformat()
visit_info = OriginVisitInfo(
date=visit_date,
formatted_date="",
metadata={},
origin=_origin_url,
snapshot=None,
status="created",
type=_visit_type,
url="",
visit=34,
)
mock_get_origin_visits.return_value = [visit_info]
# check no visit date has been found
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 1
assert sors[0]["visit_date"] is None
mock_get_origin_visits.assert_called_once()
diff --git a/swh/web/tests/misc/test_metrics.py b/swh/web/tests/misc/test_metrics.py
index 57d0458f..8eef15cc 100644
--- a/swh/web/tests/misc/test_metrics.py
+++ b/swh/web/tests/misc/test_metrics.py
@@ -1,99 +1,99 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from itertools import product
import random
from prometheus_client.exposition import CONTENT_TYPE_LATEST
import pytest
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
SaveOriginRequest,
)
from swh.web.common.origin_save import (
ACCEPTED_SAVE_REQUESTS_METRIC,
SUBMITTED_SAVE_REQUESTS_METRIC,
get_savable_visit_types,
)
from swh.web.common.utils import reverse
from swh.web.tests.django_asserts import assert_contains
@pytest.mark.django_db
def test_origin_save_metrics(client):
visit_types = get_savable_visit_types()
request_statuses = (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_REJECTED,
SAVE_REQUEST_PENDING,
)
load_task_statuses = (
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
- SAVE_TASK_SUCCEED,
+ SAVE_TASK_SUCCEEDED,
SAVE_TASK_FAILED,
SAVE_TASK_RUNNING,
)
for _ in range(random.randint(50, 100)):
visit_type = random.choice(visit_types)
request_satus = random.choice(request_statuses)
load_task_status = random.choice(load_task_statuses)
SaveOriginRequest.objects.create(
origin_url="origin",
visit_type=visit_type,
status=request_satus,
loading_task_status=load_task_status,
)
url = reverse("metrics-prometheus")
resp = client.get(url)
assert resp.status_code == 200
assert resp["Content-Type"] == CONTENT_TYPE_LATEST
accepted_requests = SaveOriginRequest.objects.filter(status=SAVE_REQUEST_ACCEPTED)
labels_set = product(visit_types, load_task_statuses)
for labels in labels_set:
sor_count = accepted_requests.filter(
visit_type=labels[0], loading_task_status=labels[1]
).count()
metric_text = (
f"{ACCEPTED_SAVE_REQUESTS_METRIC}{{"
f'load_task_status="{labels[1]}",'
f'visit_type="{labels[0]}"}} {float(sor_count)}\n'
)
assert_contains(resp, metric_text)
labels_set = product(visit_types, request_statuses)
for labels in labels_set:
sor_count = SaveOriginRequest.objects.filter(
visit_type=labels[0], status=labels[1]
).count()
metric_text = (
f"{SUBMITTED_SAVE_REQUESTS_METRIC}{{"
f'status="{labels[1]}",'
f'visit_type="{labels[0]}"}} {float(sor_count)}\n'
)
assert_contains(resp, metric_text)