Changeset View
Standalone View
swh/web/tests/common/test_origin_save.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timedelta, timezone | from datetime import datetime, timedelta, timezone | ||||
from functools import partial | from functools import partial | ||||
import re | import re | ||||
from typing import Optional | |||||
import pytest | import pytest | ||||
import requests | import requests | ||||
from swh.core.pytest_plugin import get_response_cb | from swh.core.pytest_plugin import get_response_cb | ||||
from swh.web.common.models import ( | from swh.web.common.models import ( | ||||
SAVE_REQUEST_ACCEPTED, | SAVE_REQUEST_ACCEPTED, | ||||
SAVE_TASK_FAILED, | SAVE_TASK_FAILED, | ||||
▲ Show 20 Lines • Show All 207 Lines • ▼ Show 20 Lines | def test_get_save_origin_requests_find_visit_date(mocker): | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
assert len(sors) == 2 | assert len(sors) == 2 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | ||||
assert sors[0]["visit_date"] is None | assert sors[0]["visit_date"] is None | ||||
mock_get_origin_visits.assert_called_once() | mock_get_origin_visits.assert_called_once() | ||||
def _get_save_origin_requests(mocker, load_status, visit_status): | def _get_save_origin_requests( | ||||
# create a save request | mocker, load_status, visit_status, request_date: Optional[datetime] = None | ||||
): | |||||
"""Wrapper around the get_origin_save_origin_request call. | |||||
""" | |||||
SaveOriginRequest.objects.create( | SaveOriginRequest.objects.create( | ||||
request_date=datetime.now(tz=timezone.utc), | request_date=datetime.now(tz=timezone.utc), | ||||
ardumont: note that this is confusing when one wants to override the date [1]
The model is already auto… | |||||
Not Done Inline ActionsRight, I guess you can remove that line in a separate commit. anlambert: Right, I guess you can remove that line in a separate commit. | |||||
visit_type=_visit_type, | visit_type=_visit_type, | ||||
origin_url=_origin_url, | origin_url=_origin_url, | ||||
status=SAVE_REQUEST_ACCEPTED, | status=SAVE_REQUEST_ACCEPTED, | ||||
visit_date=None, | visit_date=None, | ||||
loading_task_id=_task_id, | loading_task_id=_task_id, | ||||
) | ) | ||||
# mock scheduler and archives | # mock scheduler and archives | ||||
_mock_scheduler( | _mock_scheduler( | ||||
mocker, task_status="next_run_scheduled", task_run_status=load_status | mocker, task_status="next_run_scheduled", task_run_status=load_status | ||||
) | ) | ||||
mock_archive = mocker.patch("swh.web.common.origin_save.archive") | mock_archive = mocker.patch("swh.web.common.origin_save.archive") | ||||
mock_archive.lookup_origin.return_value = {"url": _origin_url} | mock_archive.lookup_origin.return_value = {"url": _origin_url} | ||||
mock_get_origin_visits = mocker.patch( | mock_get_origin_visits = mocker.patch( | ||||
"swh.web.common.origin_save.get_origin_visits" | "swh.web.common.origin_save.get_origin_visits" | ||||
) | ) | ||||
# create a visit for the save request with status created | # create a visit for the save request with status created | ||||
visit_date = datetime.now(tz=timezone.utc).isoformat() | visit_date = datetime.now(tz=timezone.utc).isoformat() | ||||
visit_info = OriginVisitInfo( | visit_info = OriginVisitInfo( | ||||
date=visit_date, | date=visit_date, | ||||
formatted_date="", | formatted_date="", | ||||
metadata={}, | metadata={}, | ||||
origin=_origin_url, | origin=_origin_url, | ||||
snapshot=None, | snapshot="", # make mypy happy | ||||
Not Done Inline ActionsI don't think this deserves a comment vlorentz: I don't think this deserves a comment | |||||
Done Inline Actionsi'm pretty sure you might have asked me why the change without the comment ;) ardumont: i'm pretty sure you might have asked me why the change without the comment ;) | |||||
Not Done Inline Actionsagreed anlambert: agreed | |||||
status=visit_status, | status=visit_status, | ||||
type=_visit_type, | type=_visit_type, | ||||
url="", | url="", | ||||
visit=34, | visit=34, | ||||
) | ) | ||||
mock_get_origin_visits.return_value = [visit_info] | mock_get_origin_visits.return_value = [visit_info] | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
mock_get_origin_visits.assert_called_once() | mock_get_origin_visits.assert_called_once() | ||||
return sors | return sors | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_requests_no_visit_date_found(mocker): | @pytest.mark.parametrize("visit_status", ["created", "ongoing",]) | ||||
def test_get_save_origin_requests_no_visit_date_found(mocker, visit_status): | |||||
"""Uneventful visits with failed visit status are marked as failed | |||||
""" | |||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status="scheduled", visit_status="created" | mocker, load_status="scheduled", visit_status=visit_status, | ||||
) | ) | ||||
# check no visit date has been found | # check no visit date has been found | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING | assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING | ||||
assert sors[0]["visit_date"] is None | assert sors[0]["visit_date"] is None | ||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_requests_failed_when_not_found(mocker): | @pytest.mark.parametrize("visit_status", ["not_found", "failed",]) | ||||
def test_get_save_origin_requests_no_failed_status_override(mocker, visit_status): | |||||
"""Uneventful visits with failed statuses (failed, not found) are marked as failed | |||||
""" | |||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status="uneventful", visit_status="not_found" | mocker, load_status="uneventful", visit_status=visit_status | ||||
) | ) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | |||||
visit_date = sors[0]["visit_date"] | |||||
if visit_status == "failed": | |||||
Done Inline Actionsbecause that's what the code is doing, it overwrite the visit_date to none for that case. ardumont: because that's what the code is doing, it overwrite the visit_date to none for that case. | |||||
Not Done Inline ActionsIndeed, and it should not do it, that's a mistake in the save code now code. As a visit has been made, the date should not be discarded. To investigate later. anlambert: Indeed, and it should not do it, that's a mistake in the save code now code.
As a visit has… | |||||
assert visit_date is None | |||||
else: | |||||
assert visit_date is not None | |||||
sors = get_save_origin_requests(_visit_type, _origin_url) | |||||
assert len(sors) == 1 | |||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | ||||
assert sors[0]["visit_date"] is not None | |||||
@pytest.mark.django_db | @pytest.mark.django_db | ||||
def test_get_save_origin_requests_no_failed_status_override(mocker): | @pytest.mark.parametrize( | ||||
"load_status,visit_status", | |||||
[("eventful", "full"), ("eventful", "partial"), ("uneventful", "partial"),], | |||||
) | |||||
def test_get_visit_info_for_save_request_succeeded(mocker, load_status, visit_status): | |||||
"""Nominal scenario, below 30 days, returns something""" | |||||
sors = _get_save_origin_requests( | sors = _get_save_origin_requests( | ||||
mocker, load_status="uneventful", visit_status="not_found" | mocker, load_status=load_status, visit_status=visit_status | ||||
) | ) | ||||
assert len(sors) == 1 | assert len(sors) == 1 | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | |||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | |||||
assert sors[0]["visit_date"] is not None | assert sors[0]["visit_date"] is not None | ||||
sors = get_save_origin_requests(_visit_type, _origin_url) | sors = get_save_origin_requests(_visit_type, _origin_url) | ||||
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED | assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | ||||
@pytest.mark.django_db | |||||
@pytest.mark.parametrize("load_status", ["eventful", "uneventful",]) | |||||
def test_get_visit_info_incomplete_visit_still_successful(mocker, load_status): | |||||
Done Inline ActionsThat test is confusing but so is the current behavior so status quo ;) ardumont: That test is confusing but so is the current behavior so status quo ;) | |||||
Not Done Inline ActionsBecause the behavior prior exploiting visit statuses was to check scheduler task statuses (because only that info was available at the time). I guess we could mark a save request as succeeded only if a visit status has been found now (in another diff). anlambert: Because the behavior prior exploiting visit statuses was to check scheduler task statuses… | |||||
Done Inline Actionsyes, quite. I planned to update the behavior and modify some more those tests accordingly ardumont: yes, quite.
I planned to update the behavior and modify some more those tests accordingly
to… | |||||
"""Incomplete visit information, yet the task is considered ok | |||||
""" | |||||
sors = _get_save_origin_requests( | |||||
mocker, load_status=load_status, visit_status=None, | |||||
) | |||||
assert len(sors) == 1 | |||||
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED | |||||
assert sors[0]["visit_date"] is None |
note that this is confusing when one wants to override the date [1]
The model is already auto adding the date.
That means we can remove that line and let the default django behavior occur ;)
[1] I tried locally at some point to try and push some date beyong the threshold. I did
not push it because I still failed to reproduce the issue in tests.