diff --git a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py b/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py index b347cb9a3..0847505ed 100644 --- a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py +++ b/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py @@ -1,20 +1,23 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.10.7 on 2018-09-11 14:53 +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('swh.web.common', '0001_initial'), ] operations = [ migrations.AddField( model_name='saveoriginrequest', name='visit_date', field=models.DateTimeField(null=True), ), ] diff --git a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py b/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py new file mode 100644 index 000000000..5b32752a7 --- /dev/null +++ b/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +# flake8: noqa + +from __future__ import unicode_literals + +from django.db import migrations, models + +from swh.web.config import scheduler + +def _remove_archived_tasks_with_no_saved_status(apps, schema_editor): + """ + Scheduler tasks are archived on a regular basis so their completion + state could not be known anymore as previous to this migration, + the loading task status was not stored in the database. + So remove the rows associated to already archived tasks as + the loading status can not be retrieved anymore. + """ + SaveOriginRequest = apps.get_model('swh.web.common', 'SaveOriginRequest') + no_saved_status_tasks = [] + for sor in SaveOriginRequest.objects.all(): + tasks = scheduler().get_tasks([sor.loading_task_id]) + if not tasks: + no_saved_status_tasks.append(sor.loading_task_id) + SaveOriginRequest.objects.filter(loading_task_id__in=no_saved_status_tasks).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('swh.web.common', '0002_saveoriginrequest_visit_date'), + ] + + operations = [ + migrations.AddField( + model_name='saveoriginrequest', + name='loading_task_status', + field=models.TextField(choices=[('not created', 'not created'), ('not yet scheduled', 'not yet scheduled'), ('scheduled', 'scheduled'), ('succeed', 'succeed'), ('failed', 'failed')], default='not created'), + ), + migrations.RunPython(_remove_archived_tasks_with_no_saved_status) + ] diff --git a/swh/web/common/models.py b/swh/web/common/models.py index 3852bc8b1..297c322be 100644 --- a/swh/web/common/models.py +++ b/swh/web/common/models.py @@ -1,74 +1,90 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import models class SaveAuthorizedOrigin(models.Model): """ Model table holding origin urls authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: app_label = 'swh.web.common' db_table = 'save_authorized_origin' def __str__(self): return self.url class SaveUnauthorizedOrigin(models.Model): """ Model table holding origin urls not authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: app_label = 'swh.web.common' db_table = 'save_unauthorized_origin' def __str__(self): return self.url SAVE_REQUEST_ACCEPTED = 'accepted' SAVE_REQUEST_REJECTED = 'rejected' SAVE_REQUEST_PENDING = 'pending' SAVE_REQUEST_STATUS = [ (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED), (SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED), (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING) ] +SAVE_TASK_NOT_CREATED = 'not created' +SAVE_TASK_NOT_YET_SCHEDULED = 'not yet scheduled' +SAVE_TASK_SCHEDULED = 'scheduled' +SAVE_TASK_SUCCEED = 'succeed' +SAVE_TASK_FAILED = 'failed' + +SAVE_TASK_STATUS = [ + (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED), + (SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED), + (SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED), + (SAVE_TASK_SUCCEED, SAVE_TASK_SUCCEED), + (SAVE_TASK_FAILED, SAVE_TASK_FAILED) +] + class SaveOriginRequest(models.Model): """ Model table holding all the save origin requests issued by users. """ id = models.BigAutoField(primary_key=True) request_date = models.DateTimeField(auto_now_add=True) origin_type = models.CharField(max_length=200, null=False) origin_url = models.CharField(max_length=200, null=False) status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING) loading_task_id = models.IntegerField(default=-1) visit_date = models.DateTimeField(null=True) + loading_task_status = models.TextField(choices=SAVE_TASK_STATUS, + default=SAVE_TASK_NOT_CREATED) class Meta: app_label = 'swh.web.common' db_table = 'save_origin_request' ordering = ['-id'] def __str__(self): return str({'id': self.id, 'request_date': self.request_date, 'origin_type': self.origin_type, 'origin_url': self.origin_url, 'status': self.status, 'loading_task_id': self.loading_task_id, 'visit_date': self.visit_date}) diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index 2b2298566..87f7278c1 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,351 +1,349 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.validators import URLValidator from swh.web import config from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc from swh.web.common.models import ( SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, - SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING + SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, + SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, + SAVE_TASK_SUCCEED, SAVE_TASK_FAILED ) from swh.web.common.utils import get_origin_visits, parse_timestamp from swh.scheduler.utils import create_oneshot_task_dict scheduler = config.scheduler() def get_origin_save_authorized_urls(): """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls(): """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url): """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either *accepted*, *rejected* or *pending* """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified return SAVE_REQUEST_PENDING # map origin type to scheduler task # TODO: do not hardcode the task name here # TODO: unlock hg and svn loading once the scheduler # loading tasks are available in production _origin_type_task = { 'git': 'origin-update-git', # 'hg': 'origin-load-hg', # 'svn': 'origin-load-svn' } -SAVE_TASK_NOT_CREATED = 'not created' -SAVE_TASK_NOT_YET_SCHEDULED = 'not yet scheduled' -SAVE_TASK_SCHEDULED = 'scheduled' -SAVE_TASK_SUCCEED = 'succeed' -SAVE_TASK_FAILED = 'failed' # map scheduler task status to origin save status _save_task_status = { 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, 'next_run_scheduled': SAVE_TASK_SCHEDULED, 'completed': SAVE_TASK_SUCCEED, 'disabled': SAVE_TASK_FAILED } def get_savable_origin_types(): return sorted(list(_origin_type_task.keys())) def _check_origin_type_savable(origin_type): """ Get the list of software origin types that can be loaded through a save request. Returns: list: the list of savable origin types """ allowed_origin_types = ', '.join(get_savable_origin_types()) if origin_type not in _origin_type_task: raise BadInputExc('Origin of type %s can not be saved! ' 'Allowed types are the following: %s' % (origin_type, allowed_origin_types)) _validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) def _check_origin_url_valid(origin_url): try: _validate_url(origin_url) except ValidationError: raise BadInputExc('The provided origin url (%s) is not valid!' % origin_url) def _get_visit_date_for_save_request(save_request): visit_date = None try: origin = {'type': save_request.origin_type, 'url': save_request.origin_url} origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_timestamp(v['date']) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): save_request.visit_date = visit_dates[i] save_request.save() visit_date = visit_dates[i] except Exception: pass return visit_date def _save_request_dict(save_request, task=None): - save_task_status = SAVE_TASK_NOT_CREATED visit_date = save_request.visit_date if task: save_task_status = _save_task_status[task['status']] if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) \ and not visit_date: visit_date = _get_visit_date_for_save_request(save_request) # Ensure last origin visit is available in database # before reporting the task execution as successful if save_task_status == SAVE_TASK_SUCCEED and not visit_date: save_task_status = SAVE_TASK_SCHEDULED + save_request.loading_task_status = save_task_status + save_request.save() + else: + save_task_status = save_request.loading_task_status return {'origin_type': save_request.origin_type, 'origin_url': save_request.origin_url, 'save_request_date': save_request.request_date.isoformat(), 'save_request_status': save_request.status, 'save_task_status': save_task_status, 'visit_date': visit_date.isoformat() if visit_date else None} def create_save_origin_request(origin_type, origin_url): """ Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the origin type and url are valid but also if the the save request can be accepted. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: origin_type (str): the type of origin to save (*git*, *hg*, *svn*, ...) origin_url (str): the url of the origin to save Raises: BadInputExc: the origin type or url is invalid ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **origin_type**: the type of the origin to save * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either *accepted*, *rejected* or *pending* * **save_task_status**: the origin loading task status, either *not created*, *not yet scheduled*, *scheduled*, *succeed* or *failed* """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) save_request_status = can_save_origin(origin_url) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = {'priority': 'high'} # set task parameters according to the origin type if origin_type == 'git': kwargs['repo_url'] = origin_url elif origin_type == 'hg': kwargs['origin_url'] = origin_url elif origin_type == 'svn': kwargs['origin_url'] = origin_url kwargs['svn_url'] = origin_url sor = None # get list of previously sumitted save requests current_sors = \ list(SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url)) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status task = scheduler.get_tasks([sor.loading_task_id])[0] save_task_status = _save_task_status[task['status']] # create a new scheduler task only if the previous one has been # already executed if save_task_status == SAVE_TASK_FAILED or \ save_task_status == SAVE_TASK_SUCCEED: can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict( _origin_type_task[origin_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task['id'] sor.save() else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status, # noqa loading_task_id=task['id']) # noqa # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc('The origin url is blacklisted and will not be ' 'loaded into the archive.') return _save_request_dict(sor, task) def get_save_origin_requests_from_queryset(requests_queryset): """ Get all save requests from a SaveOriginRequest queryset. Args: requests_queryset (django.db.models.QuerySet): input SaveOriginRequest queryset Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ requests = [] for sor in requests_queryset: - # rejected saving task or pending for acceptation - if sor.loading_task_id == -1: - requests.append(_save_request_dict(sor)) - continue - task = scheduler.get_tasks([sor.loading_task_id]) - # loading task may have been archived, do not return - # save request info in that case - if task: - requests.append(_save_request_dict(sor, task[0])) + task = None + # save task has not been created, no need to query + # the scheduler API + if sor.loading_task_id != -1: + tasks = scheduler.get_tasks([sor.loading_task_id]) + task = tasks[0] if tasks else None + requests.append(_save_request_dict(sor, task)) return requests def get_save_origin_requests(origin_type, origin_url): """ Get all save requests for a given software origin. Args: origin_type (str): the type of the origin origin_url (str): the url of the origin Raises: BadInputExc: the origin type or url is invalid Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url) return get_save_origin_requests_from_queryset(sors) diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/admin/test_origin_save.py index 55e80a7cc..ee07005a0 100644 --- a/swh/web/tests/admin/test_origin_save.py +++ b/swh/web/tests/admin/test_origin_save.py @@ -1,203 +1,204 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from urllib.parse import unquote from django.contrib.auth import get_user_model from nose.tools import istest, nottest from unittest.mock import patch from swh.web.common.models import ( SaveAuthorizedOrigin, SaveUnauthorizedOrigin ) -from swh.web.common.origin_save import ( - can_save_origin, SAVE_REQUEST_PENDING, SAVE_REQUEST_ACCEPTED, +from swh.web.common.origin_save import can_save_origin +from swh.web.common.models import ( + SAVE_REQUEST_PENDING, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_YET_SCHEDULED ) from swh.web.common.utils import reverse from swh.web.tests.testcase import SWHWebTestCase _user_name = 'swh-web-admin' _user_mail = 'admin@swh-web.org' _user_password = '..34~pounds~BEAUTY~march~63..' _authorized_origin_url = 'https://scm.ourproject.org/anonscm/' _unauthorized_origin_url = 'https://www.softwareheritage.org/' class OriginSaveAdminTestCase(SWHWebTestCase): @classmethod def setUpTestData(cls): User = get_user_model() user = User.objects.create_user(_user_name, _user_mail, _user_password) user.is_staff = True user.save() SaveAuthorizedOrigin.objects.create(url=_authorized_origin_url) SaveUnauthorizedOrigin.objects.create(url=_unauthorized_origin_url) @nottest def check_not_login(self, url): login_url = reverse('login', query_params={'next': url}) response = self.client.post(url) self.assertEquals(response.status_code, 302) self.assertEquals(unquote(response.url), login_url) @istest def test_add_authorized_origin_url(self): authorized_url = 'https://scm.adullact.net/anonscm/' self.assertEquals(can_save_origin(authorized_url), SAVE_REQUEST_PENDING) url = reverse('admin-origin-save-add-authorized-url', kwargs={'origin_url': authorized_url}) self.check_not_login(url) self.assertEquals(can_save_origin(authorized_url), SAVE_REQUEST_PENDING) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEquals(response.status_code, 200) self.assertEquals(can_save_origin(authorized_url), SAVE_REQUEST_ACCEPTED) @istest def test_remove_authorized_origin_url(self): self.assertEquals(can_save_origin(_authorized_origin_url), SAVE_REQUEST_ACCEPTED) url = reverse('admin-origin-save-remove-authorized-url', kwargs={'origin_url': _authorized_origin_url}) self.check_not_login(url) self.assertEquals(can_save_origin(_authorized_origin_url), SAVE_REQUEST_ACCEPTED) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEquals(response.status_code, 200) self.assertEquals(can_save_origin(_authorized_origin_url), SAVE_REQUEST_PENDING) @istest def test_add_unauthorized_origin_url(self): unauthorized_url = 'https://www.yahoo./' self.assertEquals(can_save_origin(unauthorized_url), SAVE_REQUEST_PENDING) url = reverse('admin-origin-save-add-unauthorized-url', kwargs={'origin_url': unauthorized_url}) self.check_not_login(url) self.assertEquals(can_save_origin(unauthorized_url), SAVE_REQUEST_PENDING) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEquals(response.status_code, 200) self.assertEquals(can_save_origin(unauthorized_url), SAVE_REQUEST_REJECTED) @istest def test_remove_unauthorized_origin_url(self): self.assertEquals(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_REJECTED) url = reverse('admin-origin-save-remove-unauthorized-url', kwargs={'origin_url': _unauthorized_origin_url}) self.check_not_login(url) self.assertEquals(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_REJECTED) self.client.login(username=_user_name, password=_user_password) response = self.client.post(url) self.assertEquals(response.status_code, 200) self.assertEquals(can_save_origin(_unauthorized_origin_url), SAVE_REQUEST_PENDING) @istest @patch('swh.web.common.origin_save.scheduler') def test_accept_pending_save_request(self, mock_scheduler): origin_type = 'git' origin_url = 'https://v2.pikacode.com/bthate/botlib.git' save_request_url = reverse('save-origin', kwargs={'origin_type': origin_type, 'origin_url': origin_url}) response = self.client.post(save_request_url, data={}, content_type='application/x-www-form-urlencoded') # noqa self.assertEquals(response.status_code, 200) self.assertEquals(response.data['save_request_status'], SAVE_REQUEST_PENDING) accept_request_url = reverse('admin-origin-save-request-accept', kwargs={'origin_type': origin_type, 'origin_url': origin_url}) self.check_not_login(accept_request_url) tasks_data = [ { 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': 'next_run_not_scheduled', 'id': 1, } ] mock_scheduler.create_tasks.return_value = tasks_data mock_scheduler.get_tasks.return_value = tasks_data self.client.login(username=_user_name, password=_user_password) response = self.client.post(accept_request_url) self.assertEquals(response.status_code, 200) response = self.client.get(save_request_url) self.assertEquals(response.status_code, 200) self.assertEquals(response.data[0]['save_request_status'], SAVE_REQUEST_ACCEPTED) self.assertEquals(response.data[0]['save_task_status'], SAVE_TASK_NOT_YET_SCHEDULED) @istest def test_reject_pending_save_request(self): origin_type = 'git' origin_url = 'https://wikipedia.com' save_request_url = reverse('save-origin', kwargs={'origin_type': origin_type, 'origin_url': origin_url}) response = self.client.post(save_request_url, data={}, content_type='application/x-www-form-urlencoded') # noqa self.assertEquals(response.status_code, 200) self.assertEquals(response.data['save_request_status'], SAVE_REQUEST_PENDING) reject_request_url = reverse('admin-origin-save-request-reject', kwargs={'origin_type': origin_type, 'origin_url': origin_url}) self.check_not_login(reject_request_url) self.client.login(username=_user_name, password=_user_password) response = self.client.post(reject_request_url) self.assertEquals(response.status_code, 200) response = self.client.get(save_request_url) self.assertEquals(response.status_code, 200) self.assertEquals(response.data[0]['save_request_status'], SAVE_REQUEST_REJECTED) diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py index b80ac1119..4c714a41f 100644 --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/api/views/test_origin_save.py @@ -1,244 +1,252 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta from nose.tools import istest, nottest from rest_framework.test import APITestCase from unittest.mock import patch from swh.web.common.utils import reverse from swh.web.common.models import ( SaveUnauthorizedOrigin, SaveOriginRequest, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING ) -from swh.web.common.origin_save import ( +from swh.web.common.models import ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_FAILED, SAVE_TASK_SUCCEED ) from swh.web.tests.testcase import SWHWebTestCase class SaveApiTestCase(SWHWebTestCase, APITestCase): @classmethod def setUpTestData(cls): SaveUnauthorizedOrigin.objects.create( url='https://github.com/user/illegal_repo') SaveUnauthorizedOrigin.objects.create( url='https://gitlab.com/user_to_exclude') @istest def invalid_origin_type(self): url = reverse('save-origin', kwargs={'origin_type': 'foo', 'origin_url': 'https://github.com/torvalds/linux'}) # noqa response = self.client.post(url) self.assertEquals(response.status_code, 400) @istest def invalid_origin_url(self): url = reverse('save-origin', kwargs={'origin_type': 'git', 'origin_url': 'bar'}) response = self.client.post(url) self.assertEquals(response.status_code, 400) @nottest def check_created_save_request_status(self, mock_scheduler, origin_url, scheduler_task_status, expected_request_status, expected_task_status=None): if not scheduler_task_status: mock_scheduler.get_tasks.return_value = [] else: mock_scheduler.get_tasks.return_value = \ [{ 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': scheduler_task_status, 'id': 1, }] mock_scheduler.create_tasks.return_value = \ [{ 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': 'next_run_not_scheduled', 'id': 1, }] url = reverse('save-origin', kwargs={'origin_type': 'git', 'origin_url': origin_url}) response = self.client.post(url) if expected_request_status != SAVE_REQUEST_REJECTED: self.assertEqual(response.status_code, 200) self.assertEqual(response.data['save_request_status'], expected_request_status) self.assertEqual(response.data['save_task_status'], expected_task_status) else: self.assertEqual(response.status_code, 403) @nottest def check_save_request_status(self, mock_scheduler, origin_url, expected_request_status, expected_task_status, scheduler_task_status='next_run_not_scheduled', # noqa visit_date=None): mock_scheduler.get_tasks.return_value = \ [{ 'priority': 'high', 'policy': 'oneshot', 'type': 'origin-update-git', 'arguments': { 'kwargs': { 'repo_url': origin_url }, 'args': [] }, 'status': scheduler_task_status, 'id': 1, }] url = reverse('save-origin', kwargs={'origin_type': 'git', 'origin_url': origin_url}) with patch('swh.web.common.origin_save._get_visit_date_for_save_request') as mock_visit_date: # noqa mock_visit_date.return_value = visit_date response = self.client.get(url) self.assertEqual(response.status_code, 200) - save_request_data = response.data[0] self.assertEqual(save_request_data['save_request_status'], expected_request_status) self.assertEqual(save_request_data['save_task_status'], expected_task_status) + # Check that save task status is still available when + # the scheduler task has been archived + mock_scheduler.get_tasks.return_value = [] + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + save_request_data = response.data[0] + self.assertEqual(save_request_data['save_task_status'], + expected_task_status) + @istest @patch('swh.web.common.origin_save.scheduler') def save_request_rejected(self, mock_scheduler): origin_url = 'https://github.com/user/illegal_repo' self.check_created_save_request_status(mock_scheduler, origin_url, None, SAVE_REQUEST_REJECTED) self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_CREATED) @istest @patch('swh.web.common.origin_save.scheduler') def save_request_pending(self, mock_scheduler): origin_url = 'https://unkwownforge.com/user/repo' self.check_created_save_request_status(mock_scheduler, origin_url, None, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_CREATED) self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_CREATED) @istest @patch('swh.web.common.origin_save.scheduler') def save_request_succeed(self, mock_scheduler): origin_url = 'https://github.com/Kitware/CMake' self.check_created_save_request_status(mock_scheduler, origin_url, None, SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED) self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED, scheduler_task_status='next_run_scheduled') # noqa self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED, scheduler_task_status='completed', visit_date=None) # noqa visit_date = datetime.now() + timedelta(hours=1) self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEED, scheduler_task_status='completed', visit_date=visit_date) # noqa @istest @patch('swh.web.common.origin_save.scheduler') def save_request_failed(self, mock_scheduler): origin_url = 'https://gitlab.com/inkscape/inkscape' self.check_created_save_request_status(mock_scheduler, origin_url, None, SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED) self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED, scheduler_task_status='next_run_scheduled') # noqa self.check_save_request_status(mock_scheduler, origin_url, SAVE_REQUEST_ACCEPTED, SAVE_TASK_FAILED, scheduler_task_status='disabled') # noqa @istest @patch('swh.web.common.origin_save.scheduler') def create_save_request_only_when_needed(self, mock_scheduler): origin_url = 'https://gitlab.com/webpack/webpack' SaveOriginRequest.objects.create(origin_type='git', origin_url=origin_url, status=SAVE_REQUEST_ACCEPTED, # noqa loading_task_id=56) self.check_created_save_request_status(mock_scheduler, origin_url, 'next_run_not_scheduled', SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED) sors = list(SaveOriginRequest.objects.filter(origin_type='git', origin_url=origin_url)) self.assertEquals(len(sors), 1) self.check_created_save_request_status(mock_scheduler, origin_url, 'next_run_scheduled', SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED) sors = list(SaveOriginRequest.objects.filter(origin_type='git', origin_url=origin_url)) self.assertEquals(len(sors), 1) self.check_created_save_request_status(mock_scheduler, origin_url, 'completed', SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED) sors = list(SaveOriginRequest.objects.filter(origin_type='git', origin_url=origin_url)) self.assertEquals(len(sors), 2) self.check_created_save_request_status(mock_scheduler, origin_url, 'disabled', SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED) sors = list(SaveOriginRequest.objects.filter(origin_type='git', origin_url=origin_url)) self.assertEquals(len(sors), 3)