diff --git a/swh/web/admin/origin_save.py b/swh/web/admin/origin_save.py index e72c220e..4802f6bb 100644 --- a/swh/web/admin/origin_save.py +++ b/swh/web/admin/origin_save.py @@ -1,173 +1,188 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.http import require_POST from swh.web.admin.adminurls import admin_route from swh.web.common.models import ( SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest ) from swh.web.common.origin_save import ( create_save_origin_request, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED ) @admin_route(r'origin/save/', view_name='admin-origin-save') @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save(request): return render(request, 'admin/origin-save.html') def _datatables_origin_urls_response(request, urls_query_set): search_value = request.GET['search[value]'] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) column_order = request.GET['order[0][column]'] field_order = request.GET['columns[%s][name]' % column_order] order_dir = request.GET['order[0][dir]'] if order_dir == 'desc': field_order = '-' + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} table_data['draw'] = int(request.GET['draw']) table_data['recordsTotal'] = urls_query_set.count() table_data['recordsFiltered'] = urls_query_set.count() length = int(request.GET['length']) page = int(request.GET['start']) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list table_data['data'] = [{'url': u.url} for u in urls_query_set] table_data_json = json.dumps(table_data, separators=(',', ': ')) return HttpResponse(table_data_json, content_type='application/json') @admin_route(r'origin/save/authorized_urls/list/', view_name='admin-origin-save-authorized-urls-list') @staff_member_required def _admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) @admin_route(r'origin/save/authorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-authorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_add_authorized_url(request, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.origin_type, psr.origin_url) status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/authorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-authorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_remove_authorized_url(request, origin_url): try: entry = SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/list/', view_name='admin-origin-save-unauthorized-urls-list') @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) @admin_route(r'origin/save/unauthorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-unauthorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_add_unauthorized_url(request, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED psr.save() status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-unauthorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_remove_unauthorized_url(request, origin_url): try: entry = SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/request/accept/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-accept') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_request_accept(request, origin_type, origin_url): SaveAuthorizedOrigin.objects.create(url=origin_url) create_save_origin_request(origin_type, origin_url) return HttpResponse(status=200) @admin_route(r'origin/save/request/reject/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-reject') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_request_reject(request, origin_type, origin_url): SaveUnauthorizedOrigin.objects.create(url=origin_url) sor = SaveOriginRequest.objects.get(origin_type=origin_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING) sor.status = SAVE_REQUEST_REJECTED sor.save() return HttpResponse(status=200) + + +@admin_route(r'origin/save/request/remove/(?P.+)/', + view_name='admin-origin-save-request-remove') +@require_POST +@staff_member_required(login_url=settings.LOGIN_URL) +def _admin_origin_save_request_remove(request, sor_id): + try: + entry = SaveOriginRequest.objects.get(id=sor_id) + except ObjectDoesNotExist: + status_code = 404 + else: + entry.delete() + status_code = 200 + return HttpResponse(status=status_code) diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index 7c74324f..b5a8d9f6 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,79 +1,83 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.views.decorators.cache import never_cache from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_requests ) @api_route(r'/origin/save/(?P.+)/url/(?P.+)/', 'api-save-origin', methods=['GET', 'POST'], throttle_scope='swh_save_origin') @never_cache @api_doc('/origin/save/') def api_save_origin(request, origin_type, origin_url): """ .. http:get:: /api/1/origin/save/(origin_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(origin_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeed**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string origin_type: the type of origin to save (currently only ``git`` but ``hg`` and ``svn`` will soon be available) :param string origin_url: the url of the origin to save :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json string origin_url: the url of the origin to save :>json string origin_type: the type of the origin to save :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid origin type or url has been provided :statuscode 403: the provided origin url is blacklisted """ # noqa if request.method == 'POST': - return create_save_origin_request(origin_type, origin_url) + sor = create_save_origin_request(origin_type, origin_url) + del sor['id'] else: - return get_save_origin_requests(origin_type, origin_url) + sor = get_save_origin_requests(origin_type, origin_url) + for s in sor: del s['id'] # noqa + + return sor diff --git a/swh/web/assets/src/bundles/admin/origin-save.js b/swh/web/assets/src/bundles/admin/origin-save.js index 0f876b4d..3bd4f36f 100644 --- a/swh/web/assets/src/bundles/admin/origin-save.js +++ b/swh/web/assets/src/bundles/admin/origin-save.js @@ -1,256 +1,294 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, csrfPost} from 'utils/functions'; let authorizedOriginTable; let unauthorizedOriginTable; let pendingSaveRequestsTable; let acceptedSaveRequestsTable; let rejectedSaveRequestsTable; function enableRowSelection(tableSel) { $(`${tableSel} tbody`).on('click', 'tr', function() { if ($(this).hasClass('selected')) { $(this).removeClass('selected'); } else { $(`${tableSel} tr.selected`).removeClass('selected'); $(this).addClass('selected'); } }); } export function initOriginSaveAdmin() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'throw'; authorizedOriginTable = $('#swh-authorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_authorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-authorized-origin-urls'); unauthorizedOriginTable = $('#swh-unauthorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_unauthorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-unauthorized-origin-urls'); let columnsData = [ + { + data: 'id', + name: 'id', + visible: false, + searchable: false + }, { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { let date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'origin_type', name: 'origin_type' - }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { return `${data}`; } return data; } } ]; pendingSaveRequestsTable = $('#swh-origin-save-pending-requests').DataTable({ serverSide: true, ajax: Urls.browse_origin_save_requests_list('pending'), columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']] }); enableRowSelection('#swh-origin-save-pending-requests'); rejectedSaveRequestsTable = $('#swh-origin-save-rejected-requests').DataTable({ serverSide: true, ajax: Urls.browse_origin_save_requests_list('rejected'), columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']] }); + enableRowSelection('#swh-origin-save-rejected-requests'); columnsData.push({ data: 'save_task_status', name: 'save_task_status', render: (data, type, row) => { if (data === 'succeed') { let browseOriginUrl = Urls.browse_origin(row.origin_url); return `${data}`; } return data; } }); acceptedSaveRequestsTable = $('#swh-origin-save-accepted-requests').DataTable({ serverSide: true, ajax: Urls.browse_origin_save_requests_list('accepted'), columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']] }); + enableRowSelection('#swh-origin-save-accepted-requests'); $('#swh-origin-save-requests-nav-item').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-origin-save-url-filters-nav-item').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-authorized-origins-tab').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-unauthorized-origins-tab').on('shown.bs.tab', () => { unauthorizedOriginTable.draw(); }); $('#swh-save-requests-pending-tab').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-save-requests-accepted-tab').on('shown.bs.tab', () => { acceptedSaveRequestsTable.draw(); }); $('#swh-save-requests-rejected-tab').on('shown.bs.tab', () => { rejectedSaveRequestsTable.draw(); }); $('#swh-save-requests-pending-tab').click(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-accepted-tab').click(() => { acceptedSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-rejected-tab').click(() => { rejectedSaveRequestsTable.ajax.reload(null, false); }); }); } export function addAuthorizedOriginUrl() { let originUrl = $('#swh-authorized-url-prefix').val(); let addOriginUrl = Urls.admin_origin_save_add_authorized_url(originUrl); csrfPost(addOriginUrl) .then(handleFetchError) .then(() => { authorizedOriginTable.row.add({'url': originUrl}).draw(); }) .catch(response => { swh.webapp.showModalMessage( 'Duplicated origin url prefix', 'The provided origin url prefix is already registered in the authorized list.'); }); } export function removeAuthorizedOriginUrl() { let originUrl = $('#swh-authorized-origin-urls tr.selected').text(); if (originUrl) { let removeOriginUrl = Urls.admin_origin_save_remove_authorized_url(originUrl); csrfPost(removeOriginUrl) .then(handleFetchError) .then(() => { authorizedOriginTable.row('.selected').remove().draw(); }) .catch(() => {}); } } export function addUnauthorizedOriginUrl() { let originUrl = $('#swh-unauthorized-url-prefix').val(); let addOriginUrl = Urls.admin_origin_save_add_unauthorized_url(originUrl); csrfPost(addOriginUrl) .then(handleFetchError) .then(() => { unauthorizedOriginTable.row.add({'url': originUrl}).draw(); }) .catch(() => { swh.webapp.showModalMessage( 'Duplicated origin url prefix', 'The provided origin url prefix is already registered in the unauthorized list.'); }); } export function removeUnauthorizedOriginUrl() { let originUrl = $('#swh-unauthorized-origin-urls tr.selected').text(); if (originUrl) { let removeOriginUrl = Urls.admin_origin_save_remove_unauthorized_url(originUrl); csrfPost(removeOriginUrl) .then(handleFetchError) .then(() => { unauthorizedOriginTable.row('.selected').remove().draw(); }) .catch(() => {}); } } export function acceptOriginSaveRequest() { let selectedRow = pendingSaveRequestsTable.row('.selected'); if (selectedRow.length) { let acceptOriginSaveRequestCallback = () => { let rowData = selectedRow.data(); let acceptSaveRequestUrl = Urls.admin_origin_save_request_accept(rowData['origin_type'], rowData['origin_url']); csrfPost(acceptSaveRequestUrl) .then(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); }; swh.webapp.showModalConfirm( 'Accept origin save request ?', 'Are you sure to accept this origin save request ?', acceptOriginSaveRequestCallback); } } export function rejectOriginSaveRequest() { let selectedRow = pendingSaveRequestsTable.row('.selected'); if (selectedRow.length) { let rejectOriginSaveRequestCallback = () => { let rowData = selectedRow.data(); let rejectSaveRequestUrl = Urls.admin_origin_save_request_reject(rowData['origin_type'], rowData['origin_url']); csrfPost(rejectSaveRequestUrl) .then(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); }; swh.webapp.showModalConfirm( 'Reject origin save request ?', 'Are you sure to reject this origin save request ?', rejectOriginSaveRequestCallback); } } + +function removeOriginSaveRequest(requestTable) { + let selectedRow = requestTable.row('.selected'); + if (selectedRow.length) { + let requestId = selectedRow.data()['id']; + let removeOriginSaveRequestCallback = () => { + let removeSaveRequestUrl = Urls.admin_origin_save_request_remove(requestId); + csrfPost(removeSaveRequestUrl) + .then(() => { + requestTable.ajax.reload(null, false); + }); + }; + + swh.webapp.showModalConfirm( + 'Remove origin save request ?', + 'Are you sure to remove this origin save request ?', + removeOriginSaveRequestCallback); + } +} + +export function removePendingOriginSaveRequest() { + removeOriginSaveRequest(pendingSaveRequestsTable); +} + +export function removeAcceptedOriginSaveRequest() { + removeOriginSaveRequest(acceptedSaveRequestsTable); +} + +export function removeRejectedOriginSaveRequest() { + removeOriginSaveRequest(rejectedSaveRequestsTable); +} diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index 96e68cd5..2c264516 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,397 +1,398 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from datetime import datetime, timezone from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.validators import URLValidator from swh.web import config from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc from swh.web.common.models import ( SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING ) from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import parse_timestamp from swh.scheduler.utils import create_oneshot_task_dict scheduler = config.scheduler() def get_origin_save_authorized_urls(): """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls(): """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url): """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified return SAVE_REQUEST_PENDING # map origin type to scheduler task # TODO: do not hardcode the task name here # TODO: unlock hg and svn loading once the scheduler # loading tasks are available in production _origin_type_task = { 'git': 'origin-update-git', # 'hg': 'origin-load-hg', # 'svn': 'origin-load-svn' } # map scheduler task status to origin save status _save_task_status = { 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, 'next_run_scheduled': SAVE_TASK_SCHEDULED, 'completed': SAVE_TASK_SUCCEED, 'disabled': SAVE_TASK_FAILED } def get_savable_origin_types(): return sorted(list(_origin_type_task.keys())) def _check_origin_type_savable(origin_type): """ Get the list of software origin types that can be loaded through a save request. Returns: list: the list of saveable origin types """ allowed_origin_types = ', '.join(get_savable_origin_types()) if origin_type not in _origin_type_task: raise BadInputExc('Origin of type %s can not be saved! ' 'Allowed types are the following: %s' % (origin_type, allowed_origin_types)) _validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) def _check_origin_url_valid(origin_url): try: _validate_url(origin_url) except ValidationError: raise BadInputExc('The provided origin url (%s) is not valid!' % origin_url) def _get_visit_info_for_save_request(save_request): visit_date = None visit_status = None try: origin = {'type': save_request.origin_type, 'url': save_request.origin_url} origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_timestamp(v['date']) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] visit_status = origin_visits[i]['status'] if origin_visits[i]['status'] == 'ongoing': visit_date = None except Exception: pass return visit_date, visit_status def _check_visit_update_status(save_request, save_task_status): visit_date, visit_status = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date # visit has been performed, mark the saving task as succeed if visit_date and visit_status is not None: save_task_status = SAVE_TASK_SUCCEED elif visit_status == 'ongoing': save_task_status = SAVE_TASK_RUNNING else: time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # consider the task as failed if it is still in scheduled state # 30 days after its submission if time_delta.days > 30: save_task_status = SAVE_TASK_FAILED return visit_date, save_task_status def _save_request_dict(save_request, task=None): must_save = False visit_date = save_request.visit_date # save task still in scheduler db if task: save_task_status = _save_task_status[task['status']] if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) \ and not visit_date: visit_date, _ = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date must_save = True # Ensure last origin visit is available in database # before reporting the task execution as successful if save_task_status == SAVE_TASK_SUCCEED and not visit_date: save_task_status = SAVE_TASK_SCHEDULED # Check tasks still marked as scheduled / not yet scheduled if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status) # save task may have been archived else: save_task_status = save_request.loading_task_status if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status) else: save_task_status = save_request.loading_task_status if save_request.loading_task_status != save_task_status: save_request.loading_task_status = save_task_status must_save = True if must_save: save_request.save() - return {'origin_type': save_request.origin_type, + return {'id': save_request.id, + 'origin_type': save_request.origin_type, 'origin_url': save_request.origin_url, 'save_request_date': save_request.request_date.isoformat(), 'save_request_status': save_request.status, 'save_task_status': save_task_status, 'visit_date': visit_date.isoformat() if visit_date else None} def create_save_origin_request(origin_type, origin_url): """ Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the origin type and url are valid but also if the the save request can be accepted. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: origin_type (str): the type of origin to save (currently only ``git`` but ``svn`` and ``hg`` will soon be available) origin_url (str): the url of the origin to save Raises: BadInputExc: the origin type or url is invalid ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **origin_type**: the type of the origin to save * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) save_request_status = can_save_origin(origin_url) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = {'priority': 'high'} # set task parameters according to the origin type if origin_type == 'git': kwargs['repo_url'] = origin_url elif origin_type == 'hg': kwargs['origin_url'] = origin_url elif origin_type == 'svn': kwargs['origin_url'] = origin_url kwargs['svn_url'] = origin_url sor = None # get list of previously sumitted save requests current_sors = \ list(SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url)) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler.get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_status = _save_request_dict(sor, task)['save_task_status'] # create a new scheduler task only if the previous one has been # already executed if task_status == SAVE_TASK_FAILED or \ task_status == SAVE_TASK_SUCCEED: can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict( _origin_type_task[origin_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task['id'] sor.save() else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status, # noqa loading_task_id=task['id']) # noqa # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc('The origin url is blacklisted and will not be ' 'loaded into the archive.') return _save_request_dict(sor, task) def get_save_origin_requests_from_queryset(requests_queryset): """ Get all save requests from a SaveOriginRequest queryset. Args: requests_queryset (django.db.models.QuerySet): input SaveOriginRequest queryset Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) requests = [] if task_ids: tasks = scheduler.get_tasks(task_ids) tasks = {task['id']: task for task in tasks} for sor in requests_queryset: sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id)) requests.append(sr_dict) return requests def get_save_origin_requests(origin_type, origin_url): """ Get all save requests for a given software origin. Args: origin_type (str): the type of the origin origin_url (str): the url of the origin Raises: BadInputExc: the origin type or url is invalid Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url) return get_save_origin_requests_from_queryset(sors) diff --git a/swh/web/templates/admin/origin-save.html b/swh/web/templates/admin/origin-save.html index 4c5b2ead..59ed1394 100644 --- a/swh/web/templates/admin/origin-save.html +++ b/swh/web/templates/admin/origin-save.html @@ -1,159 +1,179 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2018 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% load render_bundle from webpack_loader %} {% block header %} {{ block.super }} {% render_bundle 'admin' %} {% endblock %} {% block title %} Save origin administration {% endblock %} {% block navbar-content %}

Save origin administration

{% endblock %} {% block content %}
+
Request date Origin type Origin url
-
+
+
+ +
+
Request date Origin type Origin url Save task status
+
+
+
+
+ +
+
+
Request date Origin type Origin url
+
+
+
+
+ +
+
Url
Url
{% endblock %} diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/admin/test_origin_save.py index 11997ac7..0338c3c0 100644 --- a/swh/web/tests/admin/test_origin_save.py +++ b/swh/web/tests/admin/test_origin_save.py @@ -1,216 +1,232 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from urllib.parse import unquote from django.contrib.auth import get_user_model from unittest.mock import patch from swh.web.common.models import ( - SaveAuthorizedOrigin, SaveUnauthorizedOrigin + SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest ) from swh.web.common.origin_save import can_save_origin from swh.web.common.models import ( SAVE_REQUEST_PENDING, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_YET_SCHEDULED ) from swh.web.common.utils import reverse from swh.web.tests.testcase import WebTestCase _user_name = 'swh-web-admin' _user_mail = 'admin@swh-web.org' _user_password = '..34~pounds~BEAUTY~march~63..' _authorized_origin_url = 'https://scm.ourproject.org/anonscm/' _unauthorized_origin_url = 'https://www.softwareheritage.org/' class OriginSaveAdminTestCase(WebTestCase): @classmethod def setUpTestData(cls): # noqa: N802 User = get_user_model() # noqa: N806 user = User.objects.create_user(_user_name, _user_mail, _user_password) user.is_staff = True user.save() SaveAuthorizedOrigin.objects.create(url=_authorized_origin_url) SaveUnauthorizedOrigin.objects.create(url=_unauthorized_origin_url) def check_not_login(self, url): login_url = reverse('login', query_params={'next': url}) response = self.client.post(url) self.assertEqual(response.status_code, 302) self.assertEqual(unquote(response.url), login_url) def test_add_authorized_origin_url(self): authorized_url = 'https://scm.adullact.net/anonscm/' self.assertEqual(can_save_origin(authorized_url), SAVE_REQUEST_PENDING) url = reverse('admin-origin-save-add-authorized-url', url_args={'origin_url': authorized_url}) self.check_not_login(url) self.assertEqual(can_save_origin(authorized_url), SAVE_REQUEST_PENDING) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEqual(response.status_code, 200) self.assertEqual(can_save_origin(authorized_url), SAVE_REQUEST_ACCEPTED) def test_remove_authorized_origin_url(self): self.assertEqual(can_save_origin(_authorized_origin_url), SAVE_REQUEST_ACCEPTED) url = reverse('admin-origin-save-remove-authorized-url', url_args={'origin_url': _authorized_origin_url}) self.check_not_login(url) self.assertEqual(can_save_origin(_authorized_origin_url), SAVE_REQUEST_ACCEPTED) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEqual(response.status_code, 200) self.assertEqual(can_save_origin(_authorized_origin_url), SAVE_REQUEST_PENDING) def test_add_unauthorized_origin_url(self): unauthorized_url = 'https://www.yahoo./' self.assertEqual(can_save_origin(unauthorized_url), SAVE_REQUEST_PENDING) url = reverse('admin-origin-save-add-unauthorized-url', url_args={'origin_url': unauthorized_url}) self.check_not_login(url) self.assertEqual(can_save_origin(unauthorized_url), SAVE_REQUEST_PENDING) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEqual(response.status_code, 200) self.assertEqual(can_save_origin(unauthorized_url), SAVE_REQUEST_REJECTED) def test_remove_unauthorized_origin_url(self): self.assertEqual(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_REJECTED) url = reverse('admin-origin-save-remove-unauthorized-url', url_args={'origin_url': _unauthorized_origin_url}) self.check_not_login(url) self.assertEqual(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_REJECTED) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEqual(response.status_code, 200) self.assertEqual(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_PENDING) @patch('swh.web.common.origin_save.scheduler') def test_accept_pending_save_request(self, mock_scheduler): origin_type = 'git' origin_url = 'https://v2.pikacode.com/bthate/botlib.git' save_request_url = reverse('api-save-origin', url_args={'origin_type': origin_type, 'origin_url': origin_url}) response = self.client.post(save_request_url, data={}, content_type='application/x-www-form-urlencoded') # noqa self.assertEqual(response.status_code, 200) self.assertEqual(response.data['save_request_status'], SAVE_REQUEST_PENDING) accept_request_url = reverse('admin-origin-save-request-accept', url_args={'origin_type': origin_type, 'origin_url': origin_url}) self.check_not_login(accept_request_url) tasks_data = [ { 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': 'next_run_not_scheduled', 'id': 1, } ] mock_scheduler.create_tasks.return_value = tasks_data mock_scheduler.get_tasks.return_value = tasks_data self.client.login(username=_user_name, password=_user_password) response = self.client.post(accept_request_url) self.assertEqual(response.status_code, 200) response = self.client.get(save_request_url) self.assertEqual(response.status_code, 200) self.assertEqual(response.data[0]['save_request_status'], SAVE_REQUEST_ACCEPTED) self.assertEqual(response.data[0]['save_task_status'], SAVE_TASK_NOT_YET_SCHEDULED) @patch('swh.web.common.origin_save.scheduler') def test_reject_pending_save_request(self, mock_scheduler): origin_type = 'git' origin_url = 'https://wikipedia.com' save_request_url = reverse('api-save-origin', url_args={'origin_type': origin_type, 'origin_url': origin_url}) response = self.client.post(save_request_url, data={}, content_type='application/x-www-form-urlencoded') # noqa self.assertEqual(response.status_code, 200) self.assertEqual(response.data['save_request_status'], SAVE_REQUEST_PENDING) reject_request_url = reverse('admin-origin-save-request-reject', url_args={'origin_type': origin_type, 'origin_url': origin_url}) self.check_not_login(reject_request_url) self.client.login(username=_user_name, password=_user_password) response = self.client.post(reject_request_url) self.assertEqual(response.status_code, 200) tasks_data = [ { 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': 'next_run_not_scheduled', 'id': 1, } ] mock_scheduler.create_tasks.return_value = tasks_data mock_scheduler.get_tasks.return_value = tasks_data response = self.client.get(save_request_url) self.assertEqual(response.status_code, 200) self.assertEqual(response.data[0]['save_request_status'], SAVE_REQUEST_REJECTED) + + def test_remove_save_request(self): + sor = SaveOriginRequest.objects.create(origin_type='git', + origin_url='https://wikipedia.com', # noqa + status=SAVE_REQUEST_PENDING) + self.assertEqual(SaveOriginRequest.objects.count(), 1) + + remove_request_url = reverse('admin-origin-save-request-remove', + url_args={'sor_id': sor.id}) + + self.check_not_login(remove_request_url) + + self.client.login(username=_user_name, password=_user_password) + response = self.client.post(remove_request_url) + self.assertEqual(response.status_code, 200) + self.assertEqual(SaveOriginRequest.objects.count(), 0)