diff --git a/conftest.py b/conftest.py --- a/conftest.py +++ b/conftest.py @@ -1 +1 @@ -pytest_plugins = ["swh.auth.pytest_plugin"] +pytest_plugins = ["swh.auth.pytest_plugin", "swh.scheduler.pytest_plugin"] diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -9,6 +9,7 @@ requests-mock != 1.9.0, != 1.9.1 swh.core[http] >= 0.0.95 swh.loader.git >= 0.8.0 +swh-scheduler[testing] >= 0.5.0 swh.storage >= 0.1.1 types-docutils types-pyyaml diff --git a/swh/web/common/management/commands/refresh_savecodenow_statuses.py b/swh/web/common/management/commands/refresh_savecodenow_statuses.py --- a/swh/web/common/management/commands/refresh_savecodenow_statuses.py +++ b/swh/web/common/management/commands/refresh_savecodenow_statuses.py @@ -5,14 +5,45 @@ from django.core.management.base import BaseCommand +from swh.scheduler.model import ListedOrigin from swh.web.common.origin_save import refresh_save_origin_request_statuses +from swh.web.config import scheduler as get_scheduler class Command(BaseCommand): help = "Refresh save code now origin request statuses periodically" def handle(self, *args, **options): + """Refresh origin save code now requests. + + For the origin visit types, svn, git, hg, this also installs the origins as + recurring origins to visit. + + """ refreshed_statuses = refresh_save_origin_request_statuses() + scheduler = get_scheduler() + + # then schedule the origins with meaningful status and type to be ingested + # regularly + lister = scheduler.get_or_create_lister( + name="save-code-now", instance_name="host" + ) # FIXME: retrieve the archive instance name + + listed_origins = [] + for status in refreshed_statuses: + visit_type = status["visit_type"] + if visit_type == "archives": # only deal with git, svn, hg + continue + if status["visit_status"] not in ("partial", "full"): + continue + listed_origins.append( + ListedOrigin( + lister_id=lister.id, visit_type=visit_type, url=status["origin_url"] + ) + ) + + if listed_origins: + scheduler.record_listed_origins(listed_origins) if len(refreshed_statuses) > 0: msg = f"Successfully updated {len(refreshed_statuses)} save request(s)." diff --git a/swh/web/tests/common/test_django_command.py b/swh/web/tests/common/test_django_command.py --- a/swh/web/tests/common/test_django_command.py +++ b/swh/web/tests/common/test_django_command.py @@ -3,28 +3,55 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import datetime, timedelta, timezone from io import StringIO import pytest from django.core.management import call_command +from swh.core.api.classes import stream_results +from swh.web.common.models import ( + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_FAILED, + SAVE_TASK_SCHEDULED, + SAVE_TASK_SUCCEEDED, + VISIT_STATUS_FAILED, + VISIT_STATUS_FULL, + VISIT_STATUS_PARTIAL, +) +from swh.web.common.typing import SaveOriginRequestInfo -@pytest.mark.parametrize("nb_results", [0, 10, 20]) -def test_command_refresh__with_statuses_refreshed(mocker, nb_results): - """Refresh status command reported updated non-terminal statuses. +MODULE_FQDN = "swh.web.common.management.commands" +COMMAND_NAME = "refresh_savecodenow_statuses" - """ - command_name = "refresh_savecodenow_statuses" - module_fqdn = "swh.web.common.management.commands" +AUTHORIZED_ORIGIN_URL = "https://scm.ourproject.org/anonscm/%s" + + +@pytest.fixture +def mock_scheduler_and_refresh(mocker, swh_scheduler): mock_refresh = mocker.patch( - f"{module_fqdn}.{command_name}.refresh_save_origin_request_statuses" + f"{MODULE_FQDN}.{COMMAND_NAME}.refresh_save_origin_request_statuses" ) - # fake returned refreshed status - mock_refresh.return_value = [{"": ""}] * nb_results + mock_scheduler = mocker.patch(f"{MODULE_FQDN}.{COMMAND_NAME}.get_scheduler") + mock_scheduler.return_value = swh_scheduler + + return mock_scheduler, mock_refresh + + +@pytest.mark.parametrize("nb_results", [0, 10, 20]) +def test_command_refresh__with_statuses_refreshed( + mock_scheduler_and_refresh, nb_results +): + """Refresh status command reports non-terminal statuses updates. + + """ + mock_scheduler, mock_refresh = mock_scheduler_and_refresh + # fake returned refreshed status for 'archives' visit type + mock_refresh.return_value = [{"visit_type": "archives",}] * nb_results out = StringIO() - call_command(command_name, stdout=out) + call_command(COMMAND_NAME, stdout=out) assert mock_refresh.called @@ -33,3 +60,95 @@ assert f"updated {nb_results}" in actual_output else: assert "Nothing" in actual_output + + +@pytest.fixture +def fake_refreshed_data(): + """Prepare test data within the scheduler and the swh-web model db + + """ + entries = [ + { + "visit_type": "archives", # ignored from recurring task scheduling + "visit_status": VISIT_STATUS_FULL, + "task_status": SAVE_TASK_SUCCEEDED, + }, + { + "visit_type": "git", # scheduled as recurring task + "visit_status": VISIT_STATUS_FULL, + "task_status": SAVE_TASK_SUCCEEDED, + }, + { + "visit_type": "svn", # scheduled as recurring task + "visit_status": VISIT_STATUS_FULL, + "task_status": SAVE_TASK_SUCCEEDED, + }, + { + "visit_type": "hg", # scheduled as recurring task + "visit_status": VISIT_STATUS_PARTIAL, + "task_status": SAVE_TASK_FAILED, + }, + { + "visit_type": "svn", # ignored from recurring task scheduling + "visit_status": VISIT_STATUS_FAILED, + "task_status": SAVE_TASK_FAILED, + }, + { + "visit_type": "hg", + "visit_status": "created", # ignored from recurring task scheduling + "task_status": SAVE_TASK_SCHEDULED, + }, + ] + time_now = datetime.now(tz=timezone.utc) - timedelta(days=len(entries)) + return [ + SaveOriginRequestInfo( + visit_type=meta["visit_type"], + visit_status=meta["visit_status"], + origin_url=AUTHORIZED_ORIGIN_URL % i, + save_request_date=time_now + timedelta(days=i - 1), + save_request_status=SAVE_REQUEST_ACCEPTED, + visit_date=time_now + timedelta(days=i), + save_task_status=meta["task_status"], + id=i, + loading_task_id=i, + ) + for i, meta in enumerate(entries) + ] + + +def test_command_refresh__with_recurrent_tasks_scheduling( + mock_scheduler_and_refresh, fake_refreshed_data, swh_scheduler +): + """Refresh status command report updates of statuses. The successful ones without the + type 'archived' are also scheduled recurringly. + + """ + mock_scheduler, mock_refresh = mock_scheduler_and_refresh + mock_refresh.return_value = fake_refreshed_data + + # only visit types (git, hg, svn) types with status (full, partial) are taken into + # account for scheduling, so only 3 of those matches in the fake data set. + expected_nb_scheduled = 0 + for entry in fake_refreshed_data: + if entry["visit_type"] == "archives": # only deal with git, svn, hg + continue + if entry["visit_status"] not in ("partial", "full"): + continue + expected_nb_scheduled += 1 + + assert expected_nb_scheduled == 3 + + out = StringIO() + call_command(COMMAND_NAME, stdout=out) + + actual_output = out.getvalue() + assert f"Successfully updated {len(fake_refreshed_data)}" in actual_output + + lister = swh_scheduler.get_or_create_lister( + name="save-code-now", instance_name="host" + ) + + result = list(stream_results(swh_scheduler.get_listed_origins, lister.id)) + assert len(result) == expected_nb_scheduled + + assert mock_scheduler.called