Page MenuHomeSoftware Heritage

D5858.id21020.diff
No OneTemporary

D5858.id21020.diff

diff --git a/conftest.py b/conftest.py
--- a/conftest.py
+++ b/conftest.py
@@ -1 +1 @@
-pytest_plugins = ["swh.auth.pytest_plugin"]
+pytest_plugins = ["swh.auth.pytest_plugin", "swh.scheduler.pytest_plugin"]
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -9,6 +9,7 @@
requests-mock != 1.9.0, != 1.9.1
swh.core[http] >= 0.0.95
swh.loader.git >= 0.8.0
+swh-scheduler[testing] >= 0.5.0
swh.storage >= 0.1.1
types-docutils
types-pyyaml
diff --git a/swh/web/common/management/commands/refresh_savecodenow_statuses.py b/swh/web/common/management/commands/refresh_savecodenow_statuses.py
--- a/swh/web/common/management/commands/refresh_savecodenow_statuses.py
+++ b/swh/web/common/management/commands/refresh_savecodenow_statuses.py
@@ -5,14 +5,45 @@
from django.core.management.base import BaseCommand
+from swh.scheduler.model import ListedOrigin
from swh.web.common.origin_save import refresh_save_origin_request_statuses
+from swh.web.config import scheduler as get_scheduler
class Command(BaseCommand):
help = "Refresh save code now origin request statuses periodically"
def handle(self, *args, **options):
+ """Refresh origin save code now requests.
+
+ For the origin visit types, svn, git, hg, this also installs the origins as
+ recurring origins to visit.
+
+ """
refreshed_statuses = refresh_save_origin_request_statuses()
+ scheduler = get_scheduler()
+
+ # then schedule the origins with meaningful status and type to be ingested
+ # regularly
+ lister = scheduler.get_or_create_lister(
+ name="save-code-now", instance_name="host"
+ ) # FIXME: retrieve the archive instance name
+
+ listed_origins = []
+ for status in refreshed_statuses:
+ visit_type = status["visit_type"]
+ if visit_type == "archives": # only deal with git, svn, hg
+ continue
+ if status["visit_status"] not in ("partial", "full"):
+ continue
+ listed_origins.append(
+ ListedOrigin(
+ lister_id=lister.id, visit_type=visit_type, url=status["origin_url"]
+ )
+ )
+
+ if listed_origins:
+ scheduler.record_listed_origins(listed_origins)
if len(refreshed_statuses) > 0:
msg = f"Successfully updated {len(refreshed_statuses)} save request(s)."
diff --git a/swh/web/tests/common/test_django_command.py b/swh/web/tests/common/test_django_command.py
--- a/swh/web/tests/common/test_django_command.py
+++ b/swh/web/tests/common/test_django_command.py
@@ -3,28 +3,55 @@
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import datetime, timedelta, timezone
from io import StringIO
import pytest
from django.core.management import call_command
+from swh.core.api.classes import stream_results
+from swh.web.common.models import (
+ SAVE_REQUEST_ACCEPTED,
+ SAVE_TASK_FAILED,
+ SAVE_TASK_SCHEDULED,
+ SAVE_TASK_SUCCEEDED,
+ VISIT_STATUS_FAILED,
+ VISIT_STATUS_FULL,
+ VISIT_STATUS_PARTIAL,
+)
+from swh.web.common.typing import SaveOriginRequestInfo
-@pytest.mark.parametrize("nb_results", [0, 10, 20])
-def test_command_refresh__with_statuses_refreshed(mocker, nb_results):
- """Refresh status command reported updated non-terminal statuses.
+MODULE_FQDN = "swh.web.common.management.commands"
+COMMAND_NAME = "refresh_savecodenow_statuses"
- """
- command_name = "refresh_savecodenow_statuses"
- module_fqdn = "swh.web.common.management.commands"
+AUTHORIZED_ORIGIN_URL = "https://scm.ourproject.org/anonscm/%s"
+
+
+@pytest.fixture
+def mock_scheduler_and_refresh(mocker, swh_scheduler):
mock_refresh = mocker.patch(
- f"{module_fqdn}.{command_name}.refresh_save_origin_request_statuses"
+ f"{MODULE_FQDN}.{COMMAND_NAME}.refresh_save_origin_request_statuses"
)
- # fake returned refreshed status
- mock_refresh.return_value = [{"": ""}] * nb_results
+ mock_scheduler = mocker.patch(f"{MODULE_FQDN}.{COMMAND_NAME}.get_scheduler")
+ mock_scheduler.return_value = swh_scheduler
+
+ return mock_scheduler, mock_refresh
+
+
+@pytest.mark.parametrize("nb_results", [0, 10, 20])
+def test_command_refresh__with_statuses_refreshed(
+ mock_scheduler_and_refresh, nb_results
+):
+ """Refresh status command reports non-terminal statuses updates.
+
+ """
+ mock_scheduler, mock_refresh = mock_scheduler_and_refresh
+ # fake returned refreshed status for 'archives' visit type
+ mock_refresh.return_value = [{"visit_type": "archives",}] * nb_results
out = StringIO()
- call_command(command_name, stdout=out)
+ call_command(COMMAND_NAME, stdout=out)
assert mock_refresh.called
@@ -33,3 +60,95 @@
assert f"updated {nb_results}" in actual_output
else:
assert "Nothing" in actual_output
+
+
+@pytest.fixture
+def fake_refreshed_data():
+ """Prepare test data within the scheduler and the swh-web model db
+
+ """
+ entries = [
+ {
+ "visit_type": "archives", # ignored from recurring task scheduling
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "git", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "svn", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "hg", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_PARTIAL,
+ "task_status": SAVE_TASK_FAILED,
+ },
+ {
+ "visit_type": "svn", # ignored from recurring task scheduling
+ "visit_status": VISIT_STATUS_FAILED,
+ "task_status": SAVE_TASK_FAILED,
+ },
+ {
+ "visit_type": "hg",
+ "visit_status": "created", # ignored from recurring task scheduling
+ "task_status": SAVE_TASK_SCHEDULED,
+ },
+ ]
+ time_now = datetime.now(tz=timezone.utc) - timedelta(days=len(entries))
+ return [
+ SaveOriginRequestInfo(
+ visit_type=meta["visit_type"],
+ visit_status=meta["visit_status"],
+ origin_url=AUTHORIZED_ORIGIN_URL % i,
+ save_request_date=time_now + timedelta(days=i - 1),
+ save_request_status=SAVE_REQUEST_ACCEPTED,
+ visit_date=time_now + timedelta(days=i),
+ save_task_status=meta["task_status"],
+ id=i,
+ loading_task_id=i,
+ )
+ for i, meta in enumerate(entries)
+ ]
+
+
+def test_command_refresh__with_recurrent_tasks_scheduling(
+ mock_scheduler_and_refresh, fake_refreshed_data, swh_scheduler
+):
+ """Refresh status command report updates of statuses. The successful ones without the
+ type 'archived' are also scheduled recurringly.
+
+ """
+ mock_scheduler, mock_refresh = mock_scheduler_and_refresh
+ mock_refresh.return_value = fake_refreshed_data
+
+ # only visit types (git, hg, svn) types with status (full, partial) are taken into
+ # account for scheduling, so only 3 of those matches in the fake data set.
+ expected_nb_scheduled = 0
+ for entry in fake_refreshed_data:
+ if entry["visit_type"] == "archives": # only deal with git, svn, hg
+ continue
+ if entry["visit_status"] not in ("partial", "full"):
+ continue
+ expected_nb_scheduled += 1
+
+ assert expected_nb_scheduled == 3
+
+ out = StringIO()
+ call_command(COMMAND_NAME, stdout=out)
+
+ actual_output = out.getvalue()
+ assert f"Successfully updated {len(fake_refreshed_data)}" in actual_output
+
+ lister = swh_scheduler.get_or_create_lister(
+ name="save-code-now", instance_name="host"
+ )
+
+ result = list(stream_results(swh_scheduler.get_listed_origins, lister.id))
+ assert len(result) == expected_nb_scheduled
+
+ assert mock_scheduler.called

File Metadata

Mime Type
text/plain
Expires
Dec 17 2024, 7:38 AM (13 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3233613

Event Timeline