Page MenuHomeSoftware Heritage

D6170.diff
No OneTemporary

D6170.diff

diff --git a/assets/src/bundles/save/index.js b/assets/src/bundles/save/index.js
--- a/assets/src/bundles/save/index.js
+++ b/assets/src/bundles/save/index.js
@@ -115,16 +115,10 @@
export function initOriginSave() {
- $(document).ready(async() => {
+ $(document).ready(() => {
$.fn.dataTable.ext.errMode = 'none';
- const response = await fetch(Urls.origin_save_types_list());
- const data = await response.json();
-
- for (const originType of data) {
- $('#swh-input-visit-type').append(`<option value="${originType}">${originType}</option>`);
- }
// set git as the default value as before
$('#swh-input-visit-type').val('git');
diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py
--- a/swh/web/common/origin_save.py
+++ b/swh/web/common/origin_save.py
@@ -5,6 +5,7 @@
from bisect import bisect_right
from datetime import datetime, timedelta, timezone
+from functools import lru_cache
from itertools import product
import json
import logging
@@ -20,7 +21,6 @@
from django.utils.html import escape
from swh.scheduler.utils import create_oneshot_task_dict
-from swh.web import config
from swh.web.common import archive
from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc
from swh.web.common.models import (
@@ -46,8 +46,7 @@
SaveOriginRequestInfo,
)
from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc
-
-scheduler = config.scheduler()
+from swh.web.config import get_config, scheduler
logger = logging.getLogger(__name__)
@@ -149,6 +148,12 @@
}
+@lru_cache()
+def get_scheduler_load_task_types() -> List[str]:
+ task_types = scheduler().get_task_types()
+ return [t["type"] for t in task_types if t["type"].startswith("load")]
+
+
def get_savable_visit_types_dict(privileged_user: bool = False) -> Dict:
"""Returned the supported task types the user has access to.
@@ -165,7 +170,12 @@
else:
task_types = _visit_type_task
- return task_types
+ # scheduler is not available when running cypress tests
+ if get_config().get("e2e_tests_mode"):
+ return task_types
+ else:
+ load_task_types = get_scheduler_load_task_types()
+ return {k: v for k, v in task_types.items() if v in load_task_types}
def get_savable_visit_types(privileged_user: bool = False) -> List[str]:
@@ -490,9 +500,9 @@
# a task has already been created to load the origin
elif sor.loading_task_id != -1:
# get the scheduler task and its status
- tasks = scheduler.get_tasks([sor.loading_task_id])
+ tasks = scheduler().get_tasks([sor.loading_task_id])
task = tasks[0] if tasks else None
- task_runs = scheduler.get_task_runs([sor.loading_task_id])
+ task_runs = scheduler().get_task_runs([sor.loading_task_id])
task_run = task_runs[0] if task_runs else None
save_request_info = _update_save_request_info(sor, task, task_run)
task_status = save_request_info["save_task_status"]
@@ -513,7 +523,7 @@
visit_type_tasks[visit_type], **task_kwargs
)
- task = scheduler.create_tasks([task_dict])[0]
+ task = scheduler().create_tasks([task_dict])[0]
# pending save request has been accepted
if sor:
@@ -592,9 +602,9 @@
task_ids.append(sor.loading_task_id)
save_requests = []
if task_ids:
- tasks = scheduler.get_tasks(task_ids)
+ tasks = scheduler().get_tasks(task_ids)
tasks = {task["id"]: task for task in tasks}
- task_runs = scheduler.get_task_runs(tasks)
+ task_runs = scheduler().get_task_runs(tasks)
task_runs = {task_run["task"]: task_run for task_run in task_runs}
for sor in requests_queryset:
sr_dict = _update_save_request_info(
@@ -707,12 +717,12 @@
except ObjectDoesNotExist:
return {}
- task = scheduler.get_tasks([save_request.loading_task_id])
+ task = scheduler().get_tasks([save_request.loading_task_id])
task = task[0] if task else None
if task is None:
return {}
- task_run = scheduler.get_task_runs([task["id"]])
+ task_run = scheduler().get_task_runs([task["id"]])
task_run = task_run[0] if task_run else None
if task_run is None:
return {}
@@ -724,7 +734,7 @@
# Enrich the task run with the loading visit status
task_run["visit_status"] = save_request.visit_status
- es_workers_index_url = config.get_config()["es_workers_index_url"]
+ es_workers_index_url = get_config()["es_workers_index_url"]
if not es_workers_index_url:
return task_run
es_workers_index_url += "/_search"
diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py
--- a/swh/web/misc/origin_save.py
+++ b/swh/web/misc/origin_save.py
@@ -21,18 +21,13 @@
return render(
request,
"misc/origin-save.html",
- {"heading": ("Request the saving of a software origin into the archive")},
+ {
+ "heading": ("Request the saving of a software origin into the archive"),
+ "visit_types": get_savable_visit_types(privileged_user(request)),
+ },
)
-def _visit_save_types_list(request) -> JsonResponse:
- """Return the list of supported visit types as json response
-
- """
- visit_types = get_savable_visit_types(privileged_user(request))
- return JsonResponse(visit_types, safe=False)
-
-
def _origin_save_requests_list(request, status):
if status != "all":
@@ -89,7 +84,6 @@
urlpatterns = [
url(r"^save/$", _origin_save_view, name="origin-save"),
- url(r"^save/types/list/$", _visit_save_types_list, name="origin-save-types-list"),
url(
r"^save/requests/list/(?P<status>.+)/$",
_origin_save_requests_list,
diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/templates/misc/origin-save.html
--- a/swh/web/templates/misc/origin-save.html
+++ b/swh/web/templates/misc/origin-save.html
@@ -34,7 +34,9 @@
<div class="form-group col-md-2">
<label for="swh-input-visit-type">Origin type</label>
<select id="swh-input-visit-type" class="form-control" required onchange="swh.save.maybeRequireExtraInputs();">
- {% comment %} <option selected value="">Choose...</option> {% endcomment %}
+ {% for visit_type in visit_types %}
+ <option value="{{ visit_type }}">{{ visit_type }}</option>
+ {% endfor %}
</select>
<div class="invalid-feedback">The origin type must be specified</div>
</div>
diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/admin/test_origin_save.py
--- a/swh/web/tests/admin/test_origin_save.py
+++ b/swh/web/tests/admin/test_origin_save.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -120,8 +120,8 @@
assert can_save_origin(_unauthorized_origin_url) == SAVE_REQUEST_PENDING
-def test_accept_pending_save_request(client, mocker):
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
+def test_accept_pending_save_request(client, swh_scheduler):
+
visit_type = "git"
origin_url = "https://v2.pikacode.com/bthate/botlib.git"
save_request_url = reverse(
@@ -138,20 +138,6 @@
check_not_login(client, accept_request_url)
- tasks_data = [
- {
- "priority": "high",
- "policy": "oneshot",
- "type": "load-git",
- "arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
- "status": "next_run_not_scheduled",
- "id": 1,
- }
- ]
-
- mock_scheduler.create_tasks.return_value = tasks_data
- mock_scheduler.get_tasks.return_value = tasks_data
-
client.login(username=_user_name, password=_user_password)
response = check_http_post_response(client, accept_request_url, status_code=200)
@@ -160,8 +146,8 @@
assert response.data[0]["save_task_status"] == SAVE_TASK_NOT_YET_SCHEDULED
-def test_reject_pending_save_request(client, mocker):
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
+def test_reject_pending_save_request(client, swh_scheduler):
+
visit_type = "git"
origin_url = "https://wikipedia.com"
@@ -183,20 +169,6 @@
client.login(username=_user_name, password=_user_password)
response = check_http_post_response(client, reject_request_url, status_code=200)
- tasks_data = [
- {
- "priority": "high",
- "policy": "oneshot",
- "type": "load-git",
- "arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
- "status": "next_run_not_scheduled",
- "id": 1,
- }
- ]
-
- mock_scheduler.create_tasks.return_value = tasks_data
- mock_scheduler.get_tasks.return_value = tasks_data
-
response = check_http_get_response(client, save_request_url, status_code=200)
assert response.data[0]["save_request_status"] == SAVE_REQUEST_REJECTED
diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py
--- a/swh/web/tests/api/views/test_origin_save.py
+++ b/swh/web/tests/api/views/test_origin_save.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from datetime import datetime, timedelta
+import uuid
import pytest
@@ -47,7 +48,7 @@
SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude")
-def test_invalid_visit_type(api_client):
+def test_invalid_visit_type(api_client, swh_scheduler):
url = reverse(
"api-1-save-origin",
url_args={
@@ -58,7 +59,7 @@
check_api_get_responses(api_client, url, status_code=400)
-def test_invalid_origin_url(api_client):
+def test_invalid_origin_url(api_client, swh_scheduler):
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"}
)
@@ -70,59 +71,15 @@
mocker,
origin_url,
expected_request_status,
- scheduler_task_status=None,
- scheduler_task_run_status=None,
expected_task_status=None,
visit_date=None,
):
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists")
mock_origin_exists.return_value = OriginExistenceCheckInfo(
origin_url=origin_url, exists=True, last_modified=None, content_length=None
)
- if scheduler_task_status is None:
- mock_scheduler.get_tasks.return_value = []
- else:
- mock_scheduler.get_tasks.return_value = [
- {
- "priority": "high",
- "policy": "oneshot",
- "type": "load-git",
- "arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
- "status": scheduler_task_status,
- "id": 1,
- }
- ]
-
- if scheduler_task_run_status is None:
- mock_scheduler.get_task_runs.return_value = []
- else:
- mock_scheduler.get_task_runs.return_value = [
- {
- "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
- "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
- "id": 1,
- "metadata": {},
- "scheduled": datetime.now(tz=timezone.utc),
- "started": None,
- "status": scheduler_task_run_status,
- "task": 1,
- }
- ]
-
- mock_scheduler.create_tasks.return_value = [
- {
- "priority": "high",
- "policy": "oneshot",
- "type": "load-git",
- "arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
- "status": "next_run_not_scheduled",
- "id": 1,
- }
- ]
-
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
)
@@ -143,6 +100,7 @@
def check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status,
expected_task_status,
@@ -151,33 +109,19 @@
visit_date=None,
visit_status=None,
):
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
- mock_scheduler.get_tasks.return_value = [
- {
- "priority": "high",
- "policy": "oneshot",
- "type": "load-git",
- "arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
- "status": scheduler_task_status,
- "id": 1,
- }
- ]
- if scheduler_task_run_status is None:
- mock_scheduler.get_task_runs.return_value = []
- else:
- mock_scheduler.get_task_runs.return_value = [
- {
- "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
- "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
- "id": 1,
- "metadata": {},
- "scheduled": datetime.now(tz=timezone.utc),
- "started": None,
- "status": scheduler_task_run_status,
- "task": 1,
- }
- ]
+ if expected_task_status != SAVE_TASK_NOT_CREATED:
+ task = dict(swh_scheduler.search_tasks()[0].items())
+ backend_id = str(uuid.uuid4())
+
+ if scheduler_task_status != "next_run_not_scheduled":
+ swh_scheduler.schedule_task_run(task["id"], backend_id)
+
+ if scheduler_task_run_status is not None:
+ swh_scheduler.start_task_run(backend_id)
+ task_run = dict(
+ swh_scheduler.end_task_run(backend_id, scheduler_task_run_status).items()
+ )
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
@@ -194,16 +138,19 @@
assert save_request_data["save_task_status"] == expected_task_status
assert save_request_data["visit_status"] == visit_status
- # Check that save task status is still available when
- # the scheduler task has been archived
- mock_scheduler.get_tasks.return_value = []
- response = check_api_get_responses(api_client, url, status_code=200)
- save_request_data = response.data[0]
- assert save_request_data["save_task_status"] == expected_task_status
- assert save_request_data["visit_status"] == visit_status
+ if scheduler_task_run_status is not None:
+ # Check that save task status is still available when
+ # the scheduler task has been archived
+ swh_scheduler.delete_archived_tasks(
+ [{"task_id": task["id"], "task_run_id": task_run["id"]}]
+ )
+ response = check_api_get_responses(api_client, url, status_code=200)
+ save_request_data = response.data[0]
+ assert save_request_data["save_task_status"] == expected_task_status
+ assert save_request_data["visit_status"] == visit_status
-def test_save_request_rejected(api_client, mocker):
+def test_save_request_rejected(api_client, mocker, swh_scheduler):
origin_url = "https://github.com/user/illegal_repo"
check_created_save_request_status(
api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED,
@@ -211,13 +158,14 @@
check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_REJECTED,
expected_task_status=SAVE_TASK_NOT_CREATED,
)
-def test_save_request_pending(api_client, mocker):
+def test_save_request_pending(api_client, mocker, swh_scheduler):
origin_url = "https://unkwownforge.com/user/repo"
check_created_save_request_status(
api_client,
@@ -229,13 +177,14 @@
check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_PENDING,
expected_task_status=SAVE_TASK_NOT_CREATED,
)
-def test_save_request_succeed(api_client, mocker):
+def test_save_request_scheduled(api_client, mocker, swh_scheduler):
origin_url = "https://github.com/Kitware/CMake"
check_created_save_request_status(
api_client,
@@ -247,15 +196,28 @@
check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SCHEDULED,
scheduler_task_status="next_run_scheduled",
scheduler_task_run_status="scheduled",
)
+
+
+def test_save_request_completed(api_client, mocker, swh_scheduler):
+ origin_url = "https://github.com/Kitware/CMake"
+ check_created_save_request_status(
+ api_client,
+ mocker,
+ origin_url,
+ expected_request_status=SAVE_REQUEST_ACCEPTED,
+ expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
+ )
check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SUCCEEDED,
@@ -263,10 +225,22 @@
scheduler_task_run_status="eventful",
visit_date=None,
)
+
+
+def test_save_request_completed_visit_status(api_client, mocker, swh_scheduler):
+ origin_url = "https://github.com/Kitware/CMake"
+ check_created_save_request_status(
+ api_client,
+ mocker,
+ origin_url,
+ expected_request_status=SAVE_REQUEST_ACCEPTED,
+ expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
+ )
visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SUCCEEDED,
@@ -277,7 +251,7 @@
)
-def test_save_request_failed(api_client, mocker):
+def test_save_request_failed(api_client, mocker, swh_scheduler):
origin_url = "https://gitlab.com/inkscape/inkscape"
check_created_save_request_status(
api_client,
@@ -289,15 +263,7 @@
check_save_request_status(
api_client,
mocker,
- origin_url,
- expected_request_status=SAVE_REQUEST_ACCEPTED,
- expected_task_status=SAVE_TASK_SCHEDULED,
- scheduler_task_status="next_run_scheduled",
- scheduler_task_run_status="scheduled",
- )
- check_save_request_status(
- api_client,
- mocker,
+ swh_scheduler,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_FAILED,
@@ -307,20 +273,13 @@
)
-def test_create_save_request_only_when_needed(api_client, mocker):
+def test_create_save_request_no_duplicate(api_client, mocker, swh_scheduler):
origin_url = "https://github.com/webpack/webpack"
- SaveOriginRequest.objects.create(
- visit_type="git",
- origin_url=origin_url,
- status=SAVE_REQUEST_ACCEPTED,
- loading_task_id=56,
- )
check_created_save_request_status(
api_client,
mocker,
origin_url,
- scheduler_task_status="next_run_not_scheduled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
@@ -330,51 +289,31 @@
)
assert len(sors) == 1
- check_created_save_request_status(
+ check_save_request_status(
api_client,
mocker,
+ swh_scheduler,
origin_url,
- scheduler_task_status="next_run_scheduled",
- scheduler_task_run_status="scheduled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SCHEDULED,
+ scheduler_task_status="next_run_scheduled",
+ scheduler_task_run_status="scheduled",
)
- sors = list(
- SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
- )
- assert len(sors) == 1
-
- visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
- check_created_save_request_status(
- api_client,
- mocker,
- origin_url,
- scheduler_task_status="completed",
- expected_request_status=SAVE_REQUEST_ACCEPTED,
- expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
- visit_date=visit_date,
- )
- sors = list(
- SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
- )
- # check_api_post_responses sends two POST requests to check YAML and JSON response
- assert len(sors) == 3
check_created_save_request_status(
api_client,
mocker,
origin_url,
- scheduler_task_status="disabled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
- expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
+ expected_task_status=SAVE_TASK_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
- assert len(sors) == 5
+ assert len(sors) == 1
-def test_get_save_requests_unknown_origin(api_client):
+def test_get_save_requests_unknown_origin(api_client, swh_scheduler):
unknown_origin_url = "https://gitlab.com/foo/bar"
url = reverse(
"api-1-save-origin",
@@ -444,7 +383,7 @@
def test_create_save_request_pending_review_anonymous_user(
- api_client, origin_to_review
+ api_client, origin_to_review, swh_scheduler
):
url = reverse(
@@ -461,8 +400,9 @@
def test_create_save_request_archives_with_ambassador_user(
- api_client, origin_to_review, keycloak_oidc, mocker, requests_mock,
+ api_client, keycloak_oidc, requests_mock, swh_scheduler,
):
+ swh_scheduler.add_load_archive_task_type()
keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
oidc_profile = keycloak_oidc.login()
@@ -481,32 +421,6 @@
headers={"content-length": content_length, "last-modified": last_modified,},
)
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
- mock_scheduler.get_task_runs.return_value = []
- mock_scheduler.create_tasks.return_value = [
- {
- "id": 10,
- "priority": "high",
- "policy": "oneshot",
- "status": "next_run_not_scheduled",
- "type": "load-archive-files",
- "arguments": {
- "args": [],
- "kwargs": {
- "url": originUrl,
- "artifacts": [
- {
- "url": artifact_url,
- "version": artifact_version,
- "time": last_modified,
- "length": content_length,
- }
- ],
- },
- },
- },
- ]
-
url = reverse(
"api-1-save-origin",
url_args={"visit_type": "archives", "origin_url": originUrl,},
@@ -529,9 +443,11 @@
def test_create_save_request_archives_missing_artifacts_data(
- api_client, origin_to_review, keycloak_oidc, mocker, requests_mock,
+ api_client, keycloak_oidc, swh_scheduler
):
+ swh_scheduler.add_load_archive_task_type()
+
keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
oidc_profile = keycloak_oidc.login()
api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}")
@@ -556,7 +472,7 @@
def test_create_save_request_archives_accepted_ambassador_user(
- api_client, origin_to_review, keycloak_oidc, mocker
+ api_client, origin_to_review, keycloak_oidc, mocker, swh_scheduler
):
keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
@@ -574,7 +490,7 @@
assert SaveAuthorizedOrigin.objects.get(url=origin_to_review)
-def test_create_save_request_anonymous_user_no_user_id(api_client):
+def test_create_save_request_anonymous_user_no_user_id(api_client, swh_scheduler):
origin_url = "https://some.git.hosters/user/repo"
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url},
@@ -588,7 +504,7 @@
def test_create_save_request_authenticated_user_id(
- api_client, origin_to_review, keycloak_oidc, mocker
+ api_client, keycloak_oidc, swh_scheduler
):
oidc_profile = keycloak_oidc.login()
api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}")
@@ -607,7 +523,9 @@
assert sor.user_ids == f'"{user_id}"'
-def test_create_pending_save_request_multiple_authenticated_users(api_client):
+def test_create_pending_save_request_multiple_authenticated_users(
+ api_client, swh_scheduler
+):
origin_url = "https://some.git.hosters/user/repo3"
first_user = User.objects.create_user(username="first_user", password="")
second_user = User.objects.create_user(username="second_user", password="")
diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py
--- a/swh/web/tests/browse/views/test_origin.py
+++ b/swh/web/tests/browse/views/test_origin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -67,7 +67,7 @@
@given(origin_with_multiple_visits())
-def test_origin_content_view(client, archive_data, origin):
+def test_origin_content_view(client, archive_data, swh_scheduler, origin):
origin_visits = archive_data.origin_visit_get(origin["url"])
def _get_archive_data(visit_idx):
@@ -157,7 +157,7 @@
@given(origin())
-def test_origin_root_directory_view(client, archive_data, origin):
+def test_origin_root_directory_view(client, archive_data, swh_scheduler, origin):
origin_visits = archive_data.origin_visit_get(origin["url"])
visit = origin_visits[-1]
@@ -273,7 +273,7 @@
@given(origin())
-def test_origin_sub_directory_view(client, archive_data, origin):
+def test_origin_sub_directory_view(client, archive_data, swh_scheduler, origin):
origin_visits = archive_data.origin_visit_get(origin["url"])
visit = origin_visits[-1]
diff --git a/swh/web/tests/browse/views/test_revision.py b/swh/web/tests/browse/views/test_revision.py
--- a/swh/web/tests/browse/views/test_revision.py
+++ b/swh/web/tests/browse/views/test_revision.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -34,7 +34,7 @@
@given(origin())
-def test_revision_origin_snapshot_browse(client, archive_data, origin):
+def test_revision_origin_snapshot_browse(client, archive_data, swh_scheduler, origin):
snapshot = archive_data.snapshot_get_latest(origin["url"])
revision = archive_data.snapshot_get_head(snapshot)
diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py
--- a/swh/web/tests/common/test_origin_save.py
+++ b/swh/web/tests/common/test_origin_save.py
@@ -7,12 +7,14 @@
from functools import partial
import re
from typing import Optional
+import uuid
import iso8601
import pytest
import requests
from swh.core.pytest_plugin import get_response_cb
+from swh.scheduler.utils import create_oneshot_task_dict
from swh.web.common.exc import BadInputExc
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
@@ -49,7 +51,7 @@
_origin_url = "https://gitlab.com/inkscape/inkscape"
_visit_type = "git"
-_task_id = 203525448
+_task_id = 1
@pytest.fixture(autouse=True)
@@ -61,59 +63,37 @@
@pytest.mark.django_db
-def test_get_save_origin_archived_task_info(mocker):
- _get_save_origin_task_info_test(mocker, task_archived=True)
+def test_get_save_origin_archived_task_info(swh_scheduler):
+ _get_save_origin_task_info_test(swh_scheduler, task_archived=True)
@pytest.mark.django_db
-def test_get_save_origin_task_full_info_with_es(mocker):
- _get_save_origin_task_info_test(mocker, es_available=True)
+def test_get_save_origin_task_info_without_es(swh_scheduler):
+ _get_save_origin_task_info_test(swh_scheduler, es_available=False)
-@pytest.mark.django_db
-def test_get_save_origin_task_info_with_es(mocker):
- _get_save_origin_task_info_test(mocker, es_available=True, full_info=False)
-
-
-@pytest.mark.django_db
-def test_get_save_origin_task_info_without_es(mocker):
- _get_save_origin_task_info_test(mocker, es_available=False)
-
-
-def _mock_scheduler(
- mocker,
+def _fill_scheduler_db(
+ swh_scheduler,
task_status="completed",
task_run_status="eventful",
task_archived=False,
visit_started_date=None,
):
- mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
- task = {
- "arguments": {"args": [], "kwargs": {"repo_url": _origin_url},},
- "current_interval": timedelta(days=64),
- "id": _task_id,
- "next_run": datetime.now(tz=timezone.utc) + timedelta(days=64),
- "policy": "oneshot",
- "priority": "high",
- "retries_left": 0,
- "status": task_status,
- "type": "load-git",
- }
- mock_scheduler.get_tasks.return_value = [dict(task) if not task_archived else None]
-
- task_run = {
- "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
- "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
- "id": 654270631,
- "metadata": {},
- "scheduled": datetime.now(tz=timezone.utc),
- "started": visit_started_date,
- "status": task_run_status,
- "task": _task_id,
- }
- mock_scheduler.get_task_runs.return_value = [
- dict(task_run) if not task_archived else None
- ]
+ task = task_run = None
+ if not task_archived:
+ task = swh_scheduler.create_tasks(
+ [create_oneshot_task_dict("load-git", repo_url=_origin_url)]
+ )[0]
+ backend_id = str(uuid.uuid4())
+
+ if task_status != "next_run_not_scheduled":
+ swh_scheduler.schedule_task_run(task["id"], backend_id)
+
+ if task_run_status is not None:
+ swh_scheduler.start_task_run(backend_id)
+ task_run = dict(
+ swh_scheduler.end_task_run(backend_id, task_run_status).items()
+ )
return task, task_run
@@ -125,7 +105,9 @@
("archives", False), # when no privilege, this is rejected
],
)
-def test__check_visit_type_savable(wrong_type, privileged_user):
+def test_check_visit_type_savable(wrong_type, privileged_user, swh_scheduler):
+
+ swh_scheduler.add_load_archive_task_type()
with pytest.raises(BadInputExc, match="Allowed types"):
_check_visit_type_savable(wrong_type, privileged_user)
@@ -134,7 +116,10 @@
_check_visit_type_savable("archives", True)
-def test_get_savable_visit_types():
+def test_get_savable_visit_types(swh_scheduler):
+
+ swh_scheduler.add_load_archive_task_type()
+
default_list = list(_visit_type_task.keys())
assert set(get_savable_visit_types()) == set(default_list)
@@ -146,7 +131,7 @@
def _get_save_origin_task_info_test(
- mocker, task_archived=False, es_available=True, full_info=True
+ swh_scheduler, task_archived=False, es_available=True, full_info=True
):
swh_web_config = get_config()
@@ -164,7 +149,7 @@
loading_task_id=_task_id,
)
- task, task_run = _mock_scheduler(mocker, task_archived=task_archived)
+ task, task_run = _fill_scheduler_db(swh_scheduler, task_archived=task_archived)
es_response = requests.post("%s/_search" % _es_workers_index_url).json()
@@ -215,7 +200,7 @@
@pytest.mark.django_db
-def test_get_save_origin_requests_find_visit_date(mocker):
+def test_get_save_origin_requests_find_visit_date(mocker, swh_scheduler):
# create a save request
SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
@@ -227,7 +212,7 @@
)
# mock scheduler and archive
- _mock_scheduler(mocker)
+ _fill_scheduler_db(swh_scheduler)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
mock_get_origin_visits = mocker.patch(
@@ -271,7 +256,7 @@
sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31)
sor.save()
- _mock_scheduler(mocker, task_status="disabled", task_run_status="failed")
+ _fill_scheduler_db(swh_scheduler, task_status="disabled", task_run_status="failed")
sors = get_save_origin_requests(_visit_type, _origin_url)
@@ -282,7 +267,11 @@
def _get_save_origin_requests(
- mocker, load_status, visit_status, request_date: Optional[datetime] = None
+ mocker,
+ swh_scheduler,
+ load_status,
+ visit_status,
+ request_date: Optional[datetime] = None,
):
"""Wrapper around the get_origin_save_origin_request call.
@@ -298,8 +287,8 @@
)
# mock scheduler and archives
- _mock_scheduler(
- mocker, task_status="next_run_scheduled", task_run_status=load_status
+ _fill_scheduler_db(
+ swh_scheduler, task_status="next_run_scheduled", task_run_status=load_status
)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
@@ -473,12 +462,14 @@
@pytest.mark.django_db
@pytest.mark.parametrize("visit_status", [VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING,])
-def test_get_save_origin_requests_no_visit_date_found(mocker, visit_status):
+def test_get_save_origin_requests_no_visit_date_found(
+ mocker, swh_scheduler, visit_status
+):
"""Uneventful visits with failed visit status are marked as failed
"""
sors = _get_save_origin_requests(
- mocker, load_status="scheduled", visit_status=visit_status,
+ mocker, swh_scheduler, load_status="scheduled", visit_status=visit_status,
)
# check no visit date has been found
assert len(sors) == 1
@@ -489,12 +480,14 @@
@pytest.mark.django_db
@pytest.mark.parametrize("visit_status", ["not_found", "failed",])
-def test_get_save_origin_requests_no_failed_status_override(mocker, visit_status):
+def test_get_save_origin_requests_no_failed_status_override(
+ mocker, swh_scheduler, visit_status
+):
"""Uneventful visits with failed statuses (failed, not found) are marked as failed
"""
sors = _get_save_origin_requests(
- mocker, load_status="uneventful", visit_status=visit_status
+ mocker, swh_scheduler, load_status="uneventful", visit_status=visit_status
)
assert len(sors) == 1
@@ -517,10 +510,12 @@
("uneventful", VISIT_STATUS_PARTIAL),
],
)
-def test_get_visit_info_for_save_request_succeeded(mocker, load_status, visit_status):
+def test_get_visit_info_for_save_request_succeeded(
+ mocker, swh_scheduler, load_status, visit_status
+):
"""Nominal scenario, below 30 days, returns something"""
sors = _get_save_origin_requests(
- mocker, load_status=load_status, visit_status=visit_status
+ mocker, swh_scheduler, load_status=load_status, visit_status=visit_status
)
assert len(sors) == 1
@@ -535,12 +530,14 @@
@pytest.mark.django_db
@pytest.mark.parametrize("load_status", ["eventful", "uneventful",])
-def test_get_visit_info_incomplete_visit_still_successful(mocker, load_status):
+def test_get_visit_info_incomplete_visit_still_successful(
+ mocker, swh_scheduler, load_status
+):
"""Incomplete visit information, yet the task is updated partially
"""
sors = _get_save_origin_requests(
- mocker, load_status=load_status, visit_status=None,
+ mocker, swh_scheduler, load_status=load_status, visit_status=None,
)
assert len(sors) == 1
@@ -559,7 +556,9 @@
@pytest.mark.django_db
-def test_refresh_in_progress_save_request_statuses(mocker, api_client, archive_data):
+def test_refresh_in_progress_save_request_statuses(
+ mocker, swh_scheduler, api_client, archive_data
+):
"""Refresh a pending save origins requests and update if the status changes
"""
date_now = datetime.now(tz=timezone.utc)
@@ -578,8 +577,10 @@
)
# mock scheduler and archives
- _mock_scheduler(
- mocker, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED
+ _fill_scheduler_db(
+ swh_scheduler,
+ task_status="next_run_scheduled",
+ task_run_status=SAVE_TASK_SCHEDULED,
)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
@@ -602,8 +603,8 @@
mock_get_origin_visits.return_value = [visit_info]
# make the scheduler return a running event
- _mock_scheduler(
- mocker,
+ _fill_scheduler_db(
+ swh_scheduler,
task_status="next_run_scheduled",
task_run_status="started",
visit_started_date=visit_started_date,
@@ -625,8 +626,8 @@
# make the visit status completed
# make the scheduler return a running event
- _mock_scheduler(
- mocker,
+ _fill_scheduler_db(
+ swh_scheduler,
task_status="completed",
task_run_status="eventful",
visit_started_date=visit_started_date,
@@ -658,7 +659,7 @@
@pytest.mark.django_db
-def test_refresh_save_request_statuses(mocker, api_client, archive_data):
+def test_refresh_save_request_statuses(mocker, swh_scheduler, api_client, archive_data):
"""Refresh filters save origins requests and update if changes
"""
@@ -676,8 +677,10 @@
)
# mock scheduler and archives
- _mock_scheduler(
- mocker, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED
+ _fill_scheduler_db(
+ swh_scheduler,
+ task_status="next_run_scheduled",
+ task_run_status=SAVE_TASK_SCHEDULED,
)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -3,6 +3,7 @@
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import timedelta
import json
import os
import shutil
@@ -18,10 +19,12 @@
from rest_framework.test import APIClient, APIRequestFactory
from swh.model.hashutil import ALGORITHMS, hash_to_bytes
+from swh.scheduler.tests.common import TASK_TYPES
from swh.storage.algos.origin import origin_get_latest_visit_status
from swh.storage.algos.snapshot import snapshot_get_all_branches, snapshot_get_latest
from swh.web.auth.utils import OIDC_SWH_WEB_CLIENT_ID
from swh.web.common import converters
+from swh.web.common.origin_save import get_scheduler_load_task_types
from swh.web.common.typing import OriginVisitInfo
from swh.web.config import get_config
from swh.web.tests.data import get_tests_data, override_storages
@@ -429,3 +432,51 @@
item.ihook.pytest_runtest_teardown(item=item, nextitem=nextitem)
return inner
+
+
+@pytest.fixture
+def swh_scheduler(swh_scheduler):
+ config = get_config()
+ scheduler = config["scheduler"]
+ config["scheduler"] = swh_scheduler
+ # create load-git and load-hg task types
+ for task_type in TASK_TYPES.values():
+ swh_scheduler.create_task_type(task_type)
+ # create load-svn task type
+ swh_scheduler.create_task_type(
+ {
+ "type": "load-svn",
+ "description": "Update a mercurial repository",
+ "backend_name": "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository",
+ "default_interval": timedelta(days=64),
+ "min_interval": timedelta(hours=12),
+ "max_interval": timedelta(days=64),
+ "backoff_factor": 2,
+ "max_queue_length": None,
+ "num_retries": 7,
+ "retry_delay": timedelta(hours=2),
+ }
+ )
+
+ # add method to add load-archive-files task type during tests
+ def add_load_archive_task_type():
+ swh_scheduler.create_task_type(
+ {
+ "type": "load-archive-files",
+ "description": "Load tarballs",
+ "backend_name": "swh.loader.package.archive.tasks.LoadArchive",
+ "default_interval": timedelta(days=64),
+ "min_interval": timedelta(hours=12),
+ "max_interval": timedelta(days=64),
+ "backoff_factor": 2,
+ "max_queue_length": None,
+ "num_retries": 7,
+ "retry_delay": timedelta(hours=2),
+ }
+ )
+
+ swh_scheduler.add_load_archive_task_type = add_load_archive_task_type
+
+ yield swh_scheduler
+ config["scheduler"] = scheduler
+ get_scheduler_load_task_types.cache_clear()
diff --git a/swh/web/tests/misc/test_metrics.py b/swh/web/tests/misc/test_metrics.py
--- a/swh/web/tests/misc/test_metrics.py
+++ b/swh/web/tests/misc/test_metrics.py
@@ -34,7 +34,7 @@
@pytest.mark.django_db
-def test_origin_save_metrics(client):
+def test_origin_save_metrics(client, swh_scheduler):
visit_types = get_savable_visit_types()
request_statuses = (
SAVE_REQUEST_ACCEPTED,
diff --git a/swh/web/tests/misc/test_origin_save.py b/swh/web/tests/misc/test_origin_save.py
--- a/swh/web/tests/misc/test_origin_save.py
+++ b/swh/web/tests/misc/test_origin_save.py
@@ -9,7 +9,6 @@
import pytest
from swh.auth.django.utils import oidc_user_from_profile
-from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION
from swh.web.common.models import SaveOriginRequest
from swh.web.common.origin_save import SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEEDED
from swh.web.common.utils import reverse
@@ -27,32 +26,6 @@
assert resp["location"] == redirect_url
-def test_save_types_list_default(client):
- """Unprivileged listing should display default list of visit types.
-
- """
- url = reverse("origin-save-types-list")
- resp = check_http_get_response(client, url, status_code=200)
-
- actual_response = resp.json()
- assert set(actual_response) == set(VISIT_TYPES)
-
-
-@pytest.mark.django_db
-def test_save_types_list_privileged(client, keycloak_oidc):
- """Privileged listing should display all visit types.
-
- """
- keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
- client.login(code="", code_verifier="", redirect_uri="")
-
- url = reverse("origin-save-types-list")
- resp = check_http_get_response(client, url, status_code=200)
-
- actual_response = resp.json()
- assert set(actual_response) == set(PRIVILEGED_VISIT_TYPES)
-
-
@pytest.mark.django_db
def test_save_origin_requests_list(client, mocker):
nb_origins_per_type = 10

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 1:15 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224148

Event Timeline