Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122851
D5858.id21020.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D5858.id21020.diff
View Options
diff --git a/conftest.py b/conftest.py
--- a/conftest.py
+++ b/conftest.py
@@ -1 +1 @@
-pytest_plugins = ["swh.auth.pytest_plugin"]
+pytest_plugins = ["swh.auth.pytest_plugin", "swh.scheduler.pytest_plugin"]
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -9,6 +9,7 @@
requests-mock != 1.9.0, != 1.9.1
swh.core[http] >= 0.0.95
swh.loader.git >= 0.8.0
+swh-scheduler[testing] >= 0.5.0
swh.storage >= 0.1.1
types-docutils
types-pyyaml
diff --git a/swh/web/common/management/commands/refresh_savecodenow_statuses.py b/swh/web/common/management/commands/refresh_savecodenow_statuses.py
--- a/swh/web/common/management/commands/refresh_savecodenow_statuses.py
+++ b/swh/web/common/management/commands/refresh_savecodenow_statuses.py
@@ -5,14 +5,45 @@
from django.core.management.base import BaseCommand
+from swh.scheduler.model import ListedOrigin
from swh.web.common.origin_save import refresh_save_origin_request_statuses
+from swh.web.config import scheduler as get_scheduler
class Command(BaseCommand):
help = "Refresh save code now origin request statuses periodically"
def handle(self, *args, **options):
+ """Refresh origin save code now requests.
+
+ For the origin visit types, svn, git, hg, this also installs the origins as
+ recurring origins to visit.
+
+ """
refreshed_statuses = refresh_save_origin_request_statuses()
+ scheduler = get_scheduler()
+
+ # then schedule the origins with meaningful status and type to be ingested
+ # regularly
+ lister = scheduler.get_or_create_lister(
+ name="save-code-now", instance_name="host"
+ ) # FIXME: retrieve the archive instance name
+
+ listed_origins = []
+ for status in refreshed_statuses:
+ visit_type = status["visit_type"]
+ if visit_type == "archives": # only deal with git, svn, hg
+ continue
+ if status["visit_status"] not in ("partial", "full"):
+ continue
+ listed_origins.append(
+ ListedOrigin(
+ lister_id=lister.id, visit_type=visit_type, url=status["origin_url"]
+ )
+ )
+
+ if listed_origins:
+ scheduler.record_listed_origins(listed_origins)
if len(refreshed_statuses) > 0:
msg = f"Successfully updated {len(refreshed_statuses)} save request(s)."
diff --git a/swh/web/tests/common/test_django_command.py b/swh/web/tests/common/test_django_command.py
--- a/swh/web/tests/common/test_django_command.py
+++ b/swh/web/tests/common/test_django_command.py
@@ -3,28 +3,55 @@
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import datetime, timedelta, timezone
from io import StringIO
import pytest
from django.core.management import call_command
+from swh.core.api.classes import stream_results
+from swh.web.common.models import (
+ SAVE_REQUEST_ACCEPTED,
+ SAVE_TASK_FAILED,
+ SAVE_TASK_SCHEDULED,
+ SAVE_TASK_SUCCEEDED,
+ VISIT_STATUS_FAILED,
+ VISIT_STATUS_FULL,
+ VISIT_STATUS_PARTIAL,
+)
+from swh.web.common.typing import SaveOriginRequestInfo
-@pytest.mark.parametrize("nb_results", [0, 10, 20])
-def test_command_refresh__with_statuses_refreshed(mocker, nb_results):
- """Refresh status command reported updated non-terminal statuses.
+MODULE_FQDN = "swh.web.common.management.commands"
+COMMAND_NAME = "refresh_savecodenow_statuses"
- """
- command_name = "refresh_savecodenow_statuses"
- module_fqdn = "swh.web.common.management.commands"
+AUTHORIZED_ORIGIN_URL = "https://scm.ourproject.org/anonscm/%s"
+
+
+@pytest.fixture
+def mock_scheduler_and_refresh(mocker, swh_scheduler):
mock_refresh = mocker.patch(
- f"{module_fqdn}.{command_name}.refresh_save_origin_request_statuses"
+ f"{MODULE_FQDN}.{COMMAND_NAME}.refresh_save_origin_request_statuses"
)
- # fake returned refreshed status
- mock_refresh.return_value = [{"": ""}] * nb_results
+ mock_scheduler = mocker.patch(f"{MODULE_FQDN}.{COMMAND_NAME}.get_scheduler")
+ mock_scheduler.return_value = swh_scheduler
+
+ return mock_scheduler, mock_refresh
+
+
+@pytest.mark.parametrize("nb_results", [0, 10, 20])
+def test_command_refresh__with_statuses_refreshed(
+ mock_scheduler_and_refresh, nb_results
+):
+ """Refresh status command reports non-terminal statuses updates.
+
+ """
+ mock_scheduler, mock_refresh = mock_scheduler_and_refresh
+ # fake returned refreshed status for 'archives' visit type
+ mock_refresh.return_value = [{"visit_type": "archives",}] * nb_results
out = StringIO()
- call_command(command_name, stdout=out)
+ call_command(COMMAND_NAME, stdout=out)
assert mock_refresh.called
@@ -33,3 +60,95 @@
assert f"updated {nb_results}" in actual_output
else:
assert "Nothing" in actual_output
+
+
+@pytest.fixture
+def fake_refreshed_data():
+ """Prepare test data within the scheduler and the swh-web model db
+
+ """
+ entries = [
+ {
+ "visit_type": "archives", # ignored from recurring task scheduling
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "git", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "svn", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_FULL,
+ "task_status": SAVE_TASK_SUCCEEDED,
+ },
+ {
+ "visit_type": "hg", # scheduled as recurring task
+ "visit_status": VISIT_STATUS_PARTIAL,
+ "task_status": SAVE_TASK_FAILED,
+ },
+ {
+ "visit_type": "svn", # ignored from recurring task scheduling
+ "visit_status": VISIT_STATUS_FAILED,
+ "task_status": SAVE_TASK_FAILED,
+ },
+ {
+ "visit_type": "hg",
+ "visit_status": "created", # ignored from recurring task scheduling
+ "task_status": SAVE_TASK_SCHEDULED,
+ },
+ ]
+ time_now = datetime.now(tz=timezone.utc) - timedelta(days=len(entries))
+ return [
+ SaveOriginRequestInfo(
+ visit_type=meta["visit_type"],
+ visit_status=meta["visit_status"],
+ origin_url=AUTHORIZED_ORIGIN_URL % i,
+ save_request_date=time_now + timedelta(days=i - 1),
+ save_request_status=SAVE_REQUEST_ACCEPTED,
+ visit_date=time_now + timedelta(days=i),
+ save_task_status=meta["task_status"],
+ id=i,
+ loading_task_id=i,
+ )
+ for i, meta in enumerate(entries)
+ ]
+
+
+def test_command_refresh__with_recurrent_tasks_scheduling(
+ mock_scheduler_and_refresh, fake_refreshed_data, swh_scheduler
+):
+ """Refresh status command report updates of statuses. The successful ones without the
+ type 'archived' are also scheduled recurringly.
+
+ """
+ mock_scheduler, mock_refresh = mock_scheduler_and_refresh
+ mock_refresh.return_value = fake_refreshed_data
+
+ # only visit types (git, hg, svn) types with status (full, partial) are taken into
+ # account for scheduling, so only 3 of those matches in the fake data set.
+ expected_nb_scheduled = 0
+ for entry in fake_refreshed_data:
+ if entry["visit_type"] == "archives": # only deal with git, svn, hg
+ continue
+ if entry["visit_status"] not in ("partial", "full"):
+ continue
+ expected_nb_scheduled += 1
+
+ assert expected_nb_scheduled == 3
+
+ out = StringIO()
+ call_command(COMMAND_NAME, stdout=out)
+
+ actual_output = out.getvalue()
+ assert f"Successfully updated {len(fake_refreshed_data)}" in actual_output
+
+ lister = swh_scheduler.get_or_create_lister(
+ name="save-code-now", instance_name="host"
+ )
+
+ result = list(stream_results(swh_scheduler.get_listed_origins, lister.id))
+ assert len(result) == expected_nb_scheduled
+
+ assert mock_scheduler.called
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 17 2024, 7:38 AM (13 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3233613
Attached To
D5858: Schedule save code now as recurring origins to ingest when successful
Event Timeline
Log In to Comment