Changeset View
Changeset View
Standalone View
Standalone View
swh/web/tests/common/test_origin_save.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timedelta, timezone | from datetime import datetime, timedelta, timezone | ||||
from functools import partial | from functools import partial | ||||
import re | import re | ||||
from typing import Optional | from typing import Optional | ||||
import uuid | |||||
import iso8601 | import iso8601 | ||||
import pytest | import pytest | ||||
import requests | import requests | ||||
from swh.core.pytest_plugin import get_response_cb | from swh.core.pytest_plugin import get_response_cb | ||||
from swh.scheduler.utils import create_oneshot_task_dict | |||||
from swh.web.common.exc import BadInputExc | from swh.web.common.exc import BadInputExc | ||||
from swh.web.common.models import ( | from swh.web.common.models import ( | ||||
SAVE_REQUEST_ACCEPTED, | SAVE_REQUEST_ACCEPTED, | ||||
SAVE_TASK_FAILED, | SAVE_TASK_FAILED, | ||||
SAVE_TASK_RUNNING, | SAVE_TASK_RUNNING, | ||||
SAVE_TASK_SCHEDULED, | SAVE_TASK_SCHEDULED, | ||||
SAVE_TASK_SUCCEEDED, | SAVE_TASK_SUCCEEDED, | ||||
VISIT_STATUS_CREATED, | VISIT_STATUS_CREATED, | ||||
Show All 20 Lines | |||||
) | ) | ||||
from swh.web.config import get_config | from swh.web.config import get_config | ||||
_es_url = "http://esnode1.internal.softwareheritage.org:9200" | _es_url = "http://esnode1.internal.softwareheritage.org:9200" | ||||
_es_workers_index_url = "%s/swh_workers-*" % _es_url | _es_workers_index_url = "%s/swh_workers-*" % _es_url | ||||
_origin_url = "https://gitlab.com/inkscape/inkscape" | _origin_url = "https://gitlab.com/inkscape/inkscape" | ||||
_visit_type = "git" | _visit_type = "git" | ||||
_task_id = 203525448 | _task_id = 1 | ||||
@pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
def requests_mock_datadir(datadir, requests_mock_datadir): | def requests_mock_datadir(datadir, requests_mock_datadir): | ||||
"""Override default behavior to deal with post method""" | """Override default behavior to deal with post method""" | ||||
cb = partial(get_response_cb, datadir=datadir) | cb = partial(get_response_cb, datadir=datadir) | ||||
requests_mock_datadir.post(re.compile("https?://"), body=cb) | requests_mock_datadir.post(re.compile("https?://"), body=cb) | ||||
return requests_mock_datadir | return requests_mock_datadir | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_archived_task_info(mocker): | def test_get_save_origin_archived_task_info(swh_scheduler): | ||||
_get_save_origin_task_info_test(mocker, task_archived=True) | _get_save_origin_task_info_test(swh_scheduler, task_archived=True) | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_task_full_info_with_es(mocker): | def test_get_save_origin_task_info_without_es(swh_scheduler): | ||||
_get_save_origin_task_info_test(mocker, es_available=True) | _get_save_origin_task_info_test(swh_scheduler, es_available=False) | ||||
@pytest.mark.django_db | def _fill_scheduler_db( | ||||
def test_get_save_origin_task_info_with_es(mocker): | swh_scheduler, | ||||
_get_save_origin_task_info_test(mocker, es_available=True, full_info=False) | |||||
@pytest.mark.django_db | |||||
def test_get_save_origin_task_info_without_es(mocker): | |||||
_get_save_origin_task_info_test(mocker, es_available=False) | |||||
def _mock_scheduler( | |||||
mocker, | |||||
task_status="completed", | task_status="completed", | ||||
task_run_status="eventful", | task_run_status="eventful", | ||||
task_archived=False, | task_archived=False, | ||||
visit_started_date=None, | visit_started_date=None, | ||||
): | ): | ||||
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") | task = task_run = None | ||||
task = { | if not task_archived: | ||||
"arguments": {"args": [], "kwargs": {"repo_url": _origin_url},}, | task = swh_scheduler.create_tasks( | ||||
"current_interval": timedelta(days=64), | [create_oneshot_task_dict("load-git", repo_url=_origin_url)] | ||||
"id": _task_id, | )[0] | ||||
"next_run": datetime.now(tz=timezone.utc) + timedelta(days=64), | backend_id = str(uuid.uuid4()) | ||||
"policy": "oneshot", | |||||
"priority": "high", | if task_status != "next_run_not_scheduled": | ||||
"retries_left": 0, | swh_scheduler.schedule_task_run(task["id"], backend_id) | ||||
"status": task_status, | |||||
"type": "load-git", | if task_run_status is not None: | ||||
} | swh_scheduler.start_task_run(backend_id) | ||||
mock_scheduler.get_tasks.return_value = [dict(task) if not task_archived else None] | task_run = dict( | ||||
swh_scheduler.end_task_run(backend_id, task_run_status).items() | |||||
task_run = { | ) | ||||
"backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205", | |||||
"ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5), | |||||
"id": 654270631, | |||||
"metadata": {}, | |||||
"scheduled": datetime.now(tz=timezone.utc), | |||||
"started": visit_started_date, | |||||
"status": task_run_status, | |||||
"task": _task_id, | |||||
} | |||||
mock_scheduler.get_task_runs.return_value = [ | |||||
dict(task_run) if not task_archived else None | |||||
] | |||||
return task, task_run | return task, task_run | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"wrong_type,privileged_user", | "wrong_type,privileged_user", | ||||
[ | [ | ||||
("dummy", True), | ("dummy", True), | ||||
("dumb", False), | ("dumb", False), | ||||
("archives", False), # when no privilege, this is rejected | ("archives", False), # when no privilege, this is rejected | ||||
], | ], | ||||
) | ) | ||||
def test__check_visit_type_savable(wrong_type, privileged_user): | def test_check_visit_type_savable(wrong_type, privileged_user, swh_scheduler): | ||||
swh_scheduler.add_load_archive_task_type() | |||||
with pytest.raises(BadInputExc, match="Allowed types"): | with pytest.raises(BadInputExc, match="Allowed types"): | ||||
_check_visit_type_savable(wrong_type, privileged_user) | _check_visit_type_savable(wrong_type, privileged_user) | ||||
# when privileged_user, the following is accepted though | # when privileged_user, the following is accepted though | ||||
_check_visit_type_savable("archives", True) | _check_visit_type_savable("archives", True) | ||||
def test_get_savable_visit_types(): | def test_get_savable_visit_types(swh_scheduler): | ||||
swh_scheduler.add_load_archive_task_type() | |||||
default_list = list(_visit_type_task.keys()) | default_list = list(_visit_type_task.keys()) | ||||
assert set(get_savable_visit_types()) == set(default_list) | assert set(get_savable_visit_types()) == set(default_list) | ||||
privileged_list = default_list.copy() | privileged_list = default_list.copy() | ||||
privileged_list += list(_visit_type_task_privileged.keys()) | privileged_list += list(_visit_type_task_privileged.keys()) | ||||
assert set(get_savable_visit_types(privileged_user=True)) == set(privileged_list) | assert set(get_savable_visit_types(privileged_user=True)) == set(privileged_list) | ||||
def _get_save_origin_task_info_test( | def _get_save_origin_task_info_test( | ||||
mocker, task_archived=False, es_available=True, full_info=True | swh_scheduler, task_archived=False, es_available=True, full_info=True | ||||
): | ): | ||||
swh_web_config = get_config() | swh_web_config = get_config() | ||||
if es_available: | if es_available: | ||||
swh_web_config.update({"es_workers_index_url": _es_workers_index_url}) | swh_web_config.update({"es_workers_index_url": _es_workers_index_url}) | ||||
else: | else: | ||||
swh_web_config.update({"es_workers_index_url": ""}) | swh_web_config.update({"es_workers_index_url": ""}) | ||||
sor = SaveOriginRequest.objects.create( | sor = SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
origin_url="https://gitlab.com/inkscape/inkscape", | origin_url="https://gitlab.com/inkscape/inkscape", | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), | visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
task, task_run = _mock_scheduler(mocker, task_archived=task_archived) | task, task_run = _fill_scheduler_db(swh_scheduler, task_archived=task_archived) | ||||
es_response = requests.post("%s/_search" % _es_workers_index_url).json() | es_response = requests.post("%s/_search" % _es_workers_index_url).json() | ||||
task_exec_data = es_response["hits"]["hits"][-1]["_source"] | task_exec_data = es_response["hits"]["hits"][-1]["_source"] | ||||
sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info) | sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info) | ||||
expected_result = ( | expected_result = ( | ||||
Show All 34 Lines | if not full_info: | ||||
message += f"{line}\n" | message += f"{line}\n" | ||||
message += message_lines[-1] | message += message_lines[-1] | ||||
expected_result["message"] = message | expected_result["message"] = message | ||||
assert sor_task_info == expected_result | assert sor_task_info == expected_result | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_requests_find_visit_date(mocker): | def test_get_save_origin_requests_find_visit_date(mocker, swh_scheduler): | ||||
# create a save request | # create a save request | ||||
SaveOriginRequest.objects.create( | SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=None, | visit_date=None, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
# mock scheduler and archive | # mock scheduler and archive | ||||
_mock_scheduler(mocker) | _fill_scheduler_db(swh_scheduler) | ||||
mock_archive = mocker.patch("swh.web.common.origin_save.archive") | mock_archive = mocker.patch("swh.web.common.origin_save.archive") | ||||
mock_archive.lookup_origin.return_value = {"url": _origin_url} | mock_archive.lookup_origin.return_value = {"url": _origin_url} | ||||
mock_get_origin_visits = mocker.patch( | mock_get_origin_visits = mocker.patch( | ||||
"swh.web.common.origin_save.get_origin_visits" | "swh.web.common.origin_save.get_origin_visits" | ||||
) | ) | ||||
# create a visit for the save request | # create a visit for the save request | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
visit_info = OriginVisitInfo( | visit_info = OriginVisitInfo( | ||||
Show All 27 Lines | sor = SaveOriginRequest.objects.create( | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
visit_date=None, | visit_date=None, | ||||
) | ) | ||||
sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31) | sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31) | ||||
sor.save() | sor.save() | ||||
_mock_scheduler(mocker, task_status="disabled", task_run_status="failed") | _fill_scheduler_db(swh_scheduler, task_status="disabled", task_run_status="failed") | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
assert len(sors) == 2 | assert len(sors) == 2 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | ||||
assert sors[0]["visit_date"] is None | assert sors[0]["visit_date"] is None | ||||
mock_get_origin_visits.assert_called_once() | mock_get_origin_visits.assert_called_once() | ||||
def _get_save_origin_requests( | def _get_save_origin_requests( | ||||
mocker, load_status, visit_status, request_date: Optional[datetime] = None | mocker, | ||||
swh_scheduler, | |||||
load_status, | |||||
visit_status, | |||||
request_date: Optional[datetime] = None, | |||||
): | ): | ||||
"""Wrapper around the get_origin_save_origin_request call. | """Wrapper around the get_origin_save_origin_request call. | ||||
""" | """ | ||||
SaveOriginRequest.objects.create( | SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
visit_status=visit_status, | visit_status=visit_status, | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=None, | visit_date=None, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
# mock scheduler and archives | # mock scheduler and archives | ||||
_mock_scheduler( | _fill_scheduler_db( | ||||
mocker, task_status="next_run_scheduled", task_run_status=load_status | swh_scheduler, task_status="next_run_scheduled", task_run_status=load_status | ||||
) | ) | ||||
mock_archive = mocker.patch("swh.web.common.origin_save.archive") | mock_archive = mocker.patch("swh.web.common.origin_save.archive") | ||||
mock_archive.lookup_origin.return_value = {"url": _origin_url} | mock_archive.lookup_origin.return_value = {"url": _origin_url} | ||||
mock_get_origin_visits = mocker.patch( | mock_get_origin_visits = mocker.patch( | ||||
"swh.web.common.origin_save.get_origin_visits" | "swh.web.common.origin_save.get_origin_visits" | ||||
) | ) | ||||
# create a visit for the save request with status created | # create a visit for the save request with status created | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
▲ Show 20 Lines • Show All 157 Lines • ▼ Show 20 Lines | def test_origin_exists_200_with_data_unexpected_date_format(requests_mock): | ||||
# so the resulting date is None | # so the resulting date is None | ||||
assert actual_result == OriginExistenceCheckInfo( | assert actual_result == OriginExistenceCheckInfo( | ||||
origin_url=url, exists=True, content_length=None, last_modified=None, | origin_url=url, exists=True, content_length=None, last_modified=None, | ||||
) | ) | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
@pytest.mark.parametrize("visit_status", [VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING,]) | @pytest.mark.parametrize("visit_status", [VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING,]) | ||||
def test_get_save_origin_requests_no_visit_date_found(mocker, visit_status): | def test_get_save_origin_requests_no_visit_date_found( | ||||
mocker, swh_scheduler, visit_status | |||||
): | |||||
"""Uneventful visits with failed visit status are marked as failed | """Uneventful visits with failed visit status are marked as failed | ||||
""" | """ | ||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status="scheduled", visit_status=visit_status, | mocker, swh_scheduler, load_status="scheduled", visit_status=visit_status, | ||||
) | ) | ||||
# check no visit date has been found | # check no visit date has been found | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING | assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING | ||||
assert sors[0]["visit_date"] is not None | assert sors[0]["visit_date"] is not None | ||||
assert sors[0]["visit_status"] == visit_status | assert sors[0]["visit_status"] == visit_status | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
@pytest.mark.parametrize("visit_status", ["not_found", "failed",]) | @pytest.mark.parametrize("visit_status", ["not_found", "failed",]) | ||||
def test_get_save_origin_requests_no_failed_status_override(mocker, visit_status): | def test_get_save_origin_requests_no_failed_status_override( | ||||
mocker, swh_scheduler, visit_status | |||||
): | |||||
"""Uneventful visits with failed statuses (failed, not found) are marked as failed | """Uneventful visits with failed statuses (failed, not found) are marked as failed | ||||
""" | """ | ||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status="uneventful", visit_status=visit_status | mocker, swh_scheduler, load_status="uneventful", visit_status=visit_status | ||||
) | ) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | ||||
visit_date = sors[0]["visit_date"] | visit_date = sors[0]["visit_date"] | ||||
assert visit_date is not None | assert visit_date is not None | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | ||||
assert sors[0]["visit_status"] == visit_status | assert sors[0]["visit_status"] == visit_status | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"load_status,visit_status", | "load_status,visit_status", | ||||
[ | [ | ||||
("eventful", VISIT_STATUS_FULL), | ("eventful", VISIT_STATUS_FULL), | ||||
("eventful", VISIT_STATUS_PARTIAL), | ("eventful", VISIT_STATUS_PARTIAL), | ||||
("uneventful", VISIT_STATUS_PARTIAL), | ("uneventful", VISIT_STATUS_PARTIAL), | ||||
], | ], | ||||
) | ) | ||||
def test_get_visit_info_for_save_request_succeeded(mocker, load_status, visit_status): | def test_get_visit_info_for_save_request_succeeded( | ||||
mocker, swh_scheduler, load_status, visit_status | |||||
): | |||||
"""Nominal scenario, below 30 days, returns something""" | """Nominal scenario, below 30 days, returns something""" | ||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status=load_status, visit_status=visit_status | mocker, swh_scheduler, load_status=load_status, visit_status=visit_status | ||||
) | ) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | ||||
assert sors[0]["visit_date"] is not None | assert sors[0]["visit_date"] is not None | ||||
assert sors[0]["visit_status"] == visit_status | assert sors[0]["visit_status"] == visit_status | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | ||||
assert sors[0]["visit_status"] == visit_status | assert sors[0]["visit_status"] == visit_status | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
@pytest.mark.parametrize("load_status", ["eventful", "uneventful",]) | @pytest.mark.parametrize("load_status", ["eventful", "uneventful",]) | ||||
def test_get_visit_info_incomplete_visit_still_successful(mocker, load_status): | def test_get_visit_info_incomplete_visit_still_successful( | ||||
mocker, swh_scheduler, load_status | |||||
): | |||||
"""Incomplete visit information, yet the task is updated partially | """Incomplete visit information, yet the task is updated partially | ||||
""" | """ | ||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status=load_status, visit_status=None, | mocker, swh_scheduler, load_status=load_status, visit_status=None, | ||||
) | ) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | ||||
# As the entry is missing the following information though | # As the entry is missing the following information though | ||||
assert sors[0]["visit_date"] is not None | assert sors[0]["visit_date"] is not None | ||||
assert sors[0]["visit_status"] is None | assert sors[0]["visit_status"] is None | ||||
# It's still detected as to be updated by the refresh routine | # It's still detected as to be updated by the refresh routine | ||||
sors = refresh_save_origin_request_statuses() | sors = refresh_save_origin_request_statuses() | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | ||||
assert sors[0]["visit_date"] is not None | assert sors[0]["visit_date"] is not None | ||||
assert sors[0]["visit_status"] is None | assert sors[0]["visit_status"] is None | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_refresh_in_progress_save_request_statuses(mocker, api_client, archive_data): | def test_refresh_in_progress_save_request_statuses( | ||||
mocker, swh_scheduler, api_client, archive_data | |||||
): | |||||
"""Refresh a pending save origins requests and update if the status changes | """Refresh a pending save origins requests and update if the status changes | ||||
""" | """ | ||||
date_now = datetime.now(tz=timezone.utc) | date_now = datetime.now(tz=timezone.utc) | ||||
date_pivot = date_now - timedelta(days=30) | date_pivot = date_now - timedelta(days=30) | ||||
visit_started_date = date_now - timedelta(minutes=1) | visit_started_date = date_now - timedelta(minutes=1) | ||||
# returned visit status | # returned visit status | ||||
SaveOriginRequest.objects.create( | SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
visit_status=VISIT_STATUS_CREATED, | visit_status=VISIT_STATUS_CREATED, | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=None, | visit_date=None, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
# mock scheduler and archives | # mock scheduler and archives | ||||
_mock_scheduler( | _fill_scheduler_db( | ||||
mocker, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED | swh_scheduler, | ||||
task_status="next_run_scheduled", | |||||
task_run_status=SAVE_TASK_SCHEDULED, | |||||
) | ) | ||||
mock_archive = mocker.patch("swh.web.common.origin_save.archive") | mock_archive = mocker.patch("swh.web.common.origin_save.archive") | ||||
mock_archive.lookup_origin.return_value = {"url": _origin_url} | mock_archive.lookup_origin.return_value = {"url": _origin_url} | ||||
mock_get_origin_visits = mocker.patch( | mock_get_origin_visits = mocker.patch( | ||||
"swh.web.common.origin_save.get_origin_visits" | "swh.web.common.origin_save.get_origin_visits" | ||||
) | ) | ||||
# create a visit for the save request with status created | # create a visit for the save request with status created | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
visit_info = OriginVisitInfo( | visit_info = OriginVisitInfo( | ||||
date=visit_date, | date=visit_date, | ||||
formatted_date="", | formatted_date="", | ||||
metadata={}, | metadata={}, | ||||
origin=_origin_url, | origin=_origin_url, | ||||
snapshot="", # make mypy happy | snapshot="", # make mypy happy | ||||
status=VISIT_STATUS_CREATED, | status=VISIT_STATUS_CREATED, | ||||
type=_visit_type, | type=_visit_type, | ||||
url="", | url="", | ||||
visit=34, | visit=34, | ||||
) | ) | ||||
mock_get_origin_visits.return_value = [visit_info] | mock_get_origin_visits.return_value = [visit_info] | ||||
# make the scheduler return a running event | # make the scheduler return a running event | ||||
_mock_scheduler( | _fill_scheduler_db( | ||||
mocker, | swh_scheduler, | ||||
task_status="next_run_scheduled", | task_status="next_run_scheduled", | ||||
task_run_status="started", | task_run_status="started", | ||||
visit_started_date=visit_started_date, | visit_started_date=visit_started_date, | ||||
) | ) | ||||
# The visit is detected but still running | # The visit is detected but still running | ||||
sors = refresh_save_origin_request_statuses() | sors = refresh_save_origin_request_statuses() | ||||
assert mock_get_origin_visits.called and mock_get_origin_visits.call_count == 1 | assert mock_get_origin_visits.called and mock_get_origin_visits.call_count == 1 | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
for sor in sors: | for sor in sors: | ||||
assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot | assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot | ||||
# The status is updated | # The status is updated | ||||
assert sor["save_task_status"] == SAVE_TASK_RUNNING | assert sor["save_task_status"] == SAVE_TASK_RUNNING | ||||
# but the following entries are missing so it's not updated | # but the following entries are missing so it's not updated | ||||
assert sor["visit_date"] is not None | assert sor["visit_date"] is not None | ||||
assert sor["visit_status"] == VISIT_STATUS_CREATED | assert sor["visit_status"] == VISIT_STATUS_CREATED | ||||
# make the visit status completed | # make the visit status completed | ||||
# make the scheduler return a running event | # make the scheduler return a running event | ||||
_mock_scheduler( | _fill_scheduler_db( | ||||
mocker, | swh_scheduler, | ||||
task_status="completed", | task_status="completed", | ||||
task_run_status="eventful", | task_run_status="eventful", | ||||
visit_started_date=visit_started_date, | visit_started_date=visit_started_date, | ||||
) | ) | ||||
# This time around, the origin returned will have all required information updated | # This time around, the origin returned will have all required information updated | ||||
# (visit date and visit status in final state) | # (visit date and visit status in final state) | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
Show All 15 Lines | for sor in sors: | ||||
assert sor["visit_status"] == VISIT_STATUS_FULL | assert sor["visit_status"] == VISIT_STATUS_FULL | ||||
# Once in final state, a sor should not be updated anymore | # Once in final state, a sor should not be updated anymore | ||||
sors = refresh_save_origin_request_statuses() | sors = refresh_save_origin_request_statuses() | ||||
assert len(sors) == 0 | assert len(sors) == 0 | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_refresh_save_request_statuses(mocker, api_client, archive_data): | def test_refresh_save_request_statuses(mocker, swh_scheduler, api_client, archive_data): | ||||
"""Refresh filters save origins requests and update if changes | """Refresh filters save origins requests and update if changes | ||||
""" | """ | ||||
date_now = datetime.now(tz=timezone.utc) | date_now = datetime.now(tz=timezone.utc) | ||||
date_pivot = date_now - timedelta(days=30) | date_pivot = date_now - timedelta(days=30) | ||||
# returned visit status | # returned visit status | ||||
SaveOriginRequest.objects.create( | SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
visit_status=None, | visit_status=None, | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=None, | visit_date=None, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
# mock scheduler and archives | # mock scheduler and archives | ||||
_mock_scheduler( | _fill_scheduler_db( | ||||
mocker, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED | swh_scheduler, | ||||
task_status="next_run_scheduled", | |||||
task_run_status=SAVE_TASK_SCHEDULED, | |||||
) | ) | ||||
mock_archive = mocker.patch("swh.web.common.origin_save.archive") | mock_archive = mocker.patch("swh.web.common.origin_save.archive") | ||||
mock_archive.lookup_origin.return_value = {"url": _origin_url} | mock_archive.lookup_origin.return_value = {"url": _origin_url} | ||||
mock_get_origin_visits = mocker.patch( | mock_get_origin_visits = mocker.patch( | ||||
"swh.web.common.origin_save.get_origin_visits" | "swh.web.common.origin_save.get_origin_visits" | ||||
) | ) | ||||
# create a visit for the save request with status created | # create a visit for the save request with status created | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines |