diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -16,7 +16,7 @@ from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator -from django.db.models import QuerySet +from django.db.models import Q, QuerySet from django.utils.html import escape from swh.scheduler.utils import create_oneshot_task_dict @@ -611,12 +611,11 @@ pivot_date = datetime.now(tz=timezone.utc) - timedelta(days=MAX_THRESHOLD_DAYS) save_requests = SaveOriginRequest.objects.filter( # Retrieve accepted request statuses (all statuses) - status=SAVE_REQUEST_ACCEPTED, + Q(status=SAVE_REQUEST_ACCEPTED), # those without the required information we need to update - visit_date__isnull=True, - visit_status__isnull=True, + Q(visit_date__isnull=True) | Q(visit_status__isnull=True), # limit results to recent ones (that is roughly 30 days old at best) - request_date__gte=pivot_date, + Q(request_date__gte=pivot_date), ) return ( update_save_origin_requests_from_queryset(save_requests) diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py --- a/swh/web/tests/common/test_origin_save.py +++ b/swh/web/tests/common/test_origin_save.py @@ -78,7 +78,11 @@ def _mock_scheduler( - mocker, task_status="completed", task_run_status="eventful", task_archived=False + mocker, + task_status="completed", + task_run_status="eventful", + task_archived=False, + visit_started_date=None, ): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") task = { @@ -100,7 +104,7 @@ "id": 654270631, "metadata": {}, "scheduled": datetime.now(tz=timezone.utc), - "started": None, + "started": visit_started_date, "status": task_run_status, "task": _task_id, } @@ -518,6 +522,85 @@ assert sors[0]["visit_status"] is None +@pytest.mark.django_db +def test_refresh_in_progress_save_request_statuses(mocker, api_client, archive_data): + """Refresh a pending save origins requests and update if the status changes + """ + date_now = datetime.now(tz=timezone.utc) + date_pivot = date_now - timedelta(days=30) + visit_started_date = date_now - timedelta(minutes=1) + + # returned visit status + sors = _get_save_origin_requests( + mocker, load_status=SAVE_TASK_SCHEDULED, visit_status="created", + ) + assert len(sors) == 1 + + # make the scheduler return a running event + _mock_scheduler( + mocker, + task_status="next_run_scheduled", + task_run_status="started", + visit_started_date=visit_started_date, + ) + + # The visit is detected but still running + sors = refresh_save_origin_request_statuses() + assert len(sors) == 1 + + for sor in sors: + assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot + # The status is updated + assert sor["save_task_status"] == SAVE_TASK_RUNNING + # but the following entries are missing so it's not updated + assert sor["visit_date"] is None + assert sor["visit_status"] == "created" + + # make the visit status completed + # make the scheduler return a running event + _mock_scheduler( + mocker, + task_status="completed", + task_run_status="eventful", + visit_started_date=visit_started_date, + ) + + # This time around, the origin returned will have all information updated + mock_get_origin_visits = mocker.patch( + "swh.web.common.origin_save.get_origin_visits" + ) + # create a visit for the save request with status created + visit_date = datetime.now(tz=timezone.utc).isoformat() + visit_info = OriginVisitInfo( + date=visit_date, + formatted_date="", + metadata={}, + origin=_origin_url, + snapshot="", # make mypy happy + status="full", + type=_visit_type, + url="", + visit=34, + ) + mock_get_origin_visits.return_value = [visit_info] + + # Detected entry, this time it should be updated + sors = refresh_save_origin_request_statuses() + assert len(sors) == 1 + + for sor in sors: + assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot + # as it turns out, in this test, this won't update anything as no new status got + # returned by the scheduler + assert sor["save_task_status"] == SAVE_TASK_SUCCEEDED + assert sor["visit_date"] == visit_date + assert sor["visit_status"] == "full" + + # Once in final state, a sor should not be updated anymore + sors = refresh_save_origin_request_statuses() + assert len(sors) == 0 + + @pytest.mark.django_db def test_refresh_save_request_statuses(mocker, api_client, archive_data): """Refresh filters save origins requests and update if changes