diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py --- a/swh/deposit/cli/admin.py +++ b/swh/deposit/cli/admin.py @@ -141,7 +141,7 @@ click.echo('User %s exists.' % username) ctx.exit(0) except DepositClient.DoesNotExist: - click.echo('User %s does not exists.' % username) + click.echo('User %s does not exist.' % username) ctx.exit(1) @@ -176,3 +176,79 @@ else: output = '\n'.join((col.name for col in collections)) click.echo(output) + + +@admin.group('deposit') +@click.pass_context +def deposit(ctx): + """Manipulate deposit.""" + pass + + +@deposit.command('reschedule') +@click.option('--deposit-id', required=True, help="Deposit identifier") +@click.pass_context +def deposit_reschedule(ctx, deposit_id): + """Reschedule the deposit loading + + This will: + + - check the deposit's status to something reasonable (failed or done). That + means that the checks have passed alright but something went wrong during + the loading (failed: loading failed, done: loading ok, still for some + reasons as in bugs, we need to reschedule it) + + - reset the deposit's status to 'verified' (prior to any loading but after + the checks which are fine) and removes the different archives' identifiers + (swh-id, ...) + + - trigger back the loading task through the scheduler + + """ + # to avoid loading too early django namespaces + from datetime import datetime + from swh.deposit.models import Deposit + from swh.deposit.config import ( + DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, + DEPOSIT_STATUS_VERIFIED, SWHDefaultConfig, + ) + + try: + deposit = Deposit.objects.get(pk=deposit_id) + except Deposit.DoesNotExist: + click.echo('Deposit %s does not exist.' % deposit_id) + ctx.exit(1) + + # Check the deposit is in a reasonable state + accepted_statuses = [ + DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE + ] + if deposit.status == DEPOSIT_STATUS_VERIFIED: + click.echo('Deposit %s\'s status already set for rescheduling.' % ( + deposit_id)) + ctx.exit(0) + + if deposit.status not in accepted_statuses: + click.echo('Deposit %s\'s status be one of %s.' % ( + deposit_id, ', '.join(accepted_statuses))) + ctx.exit(1) + + task_id = deposit.load_task_id + if not task_id: + click.echo('Deposit %s cannot be rescheduled. It misses the ' + 'associated task.' % deposit_id) + ctx.exit(1) + + # Reset the deposit's state + deposit.swh_id = None + deposit.swh_id_context = None + deposit.swh_anchor_id = None + deposit.swh_anchor_id_context = None + deposit.status = DEPOSIT_STATUS_VERIFIED + deposit.save() + + # Trigger back the deposit + scheduler = SWHDefaultConfig().scheduler + scheduler.set_status_tasks( + [task_id], status='next_run_not_scheduled', + next_run=datetime.now()) diff --git a/swh/deposit/migrations/0016_auto_20190507_1408.py b/swh/deposit/migrations/0016_auto_20190507_1408.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0016_auto_20190507_1408.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-05-07 14:08 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0015_depositrequest_typemigration'), + ] + + operations = [ + migrations.AddField( + model_name='deposit', + name='check_task_id', + field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated checking task id"), + ), + migrations.AddField( + model_name='deposit', + name='load_task_id', + field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated loading task id"), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -122,6 +122,14 @@ status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) + check_task_id = models.TextField( + blank=True, null=True, + verbose_name="Scheduler's associated checking task id" + ) + load_task_id = models.TextField( + blank=True, null=True, + verbose_name="Scheduler's associated loading task id" + ) class Meta: db_table = 'deposit' diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py --- a/swh/deposit/signals.py +++ b/swh/deposit/signals.py @@ -21,6 +21,22 @@ from .config import DEPOSIT_STATUS_DEPOSITED +def schedule_task(scheduler, task): + """Schedule the task and return its identifier + + Args: + task (dict): Task to schedule + + Returns: + The task identifier + + """ + tasks = scheduler.create_tasks([task]) + if tasks: + created_task = tasks[0] + return created_task['id'] + + @receiver(post_save, sender=Deposit) def post_deposit_save(sender, instance, created, raw, using, update_fields, **kwargs): @@ -58,15 +74,25 @@ args = [instance.collection.name, instance.id] - if instance.status == DEPOSIT_STATUS_DEPOSITED: - # schedule archive check + # In the following, we are checking the instance.*task_id are not already + # populated because the `instance.save()` call will also trigger a call to + # that very function. + + if (instance.status == DEPOSIT_STATUS_DEPOSITED and + not instance.check_task_id): + # schedule deposit's checks from swh.deposit.config import PRIVATE_CHECK_DEPOSIT check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) task = create_oneshot_task_dict( 'swh-deposit-archive-checks', deposit_check_url=check_url) - else: # instance.status == DEPOSIT_STATUS_VERIFIED: - # schedule loading + check_task_id = schedule_task(default_config.scheduler, task) + instance.check_task_id = check_task_id + instance.save() + + elif (instance.status == DEPOSIT_STATUS_VERIFIED and + not instance.load_task_id): + # schedule deposit loading from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import PRIVATE_PUT_DEPOSIT @@ -80,4 +106,6 @@ deposit_meta_url=meta_url, deposit_update_url=update_url) - default_config.scheduler.create_tasks([task]) + load_task_id = schedule_task(default_config.scheduler, task) + instance.load_task_id = load_task_id + instance.save()