diff --git a/Makefile.local b/Makefile.local index d6998d37..988d89e5 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,125 +1,127 @@ TEST_DIRS := ./swh/web/tests TESTFLAGS = --hypothesis-profile=swh-web-fast TESTFULL_FLAGS = --hypothesis-profile=swh-web YARN ?= yarn SETTINGS_TEST ?= swh.web.settings.tests SETTINGS_DEV ?= swh.web.settings.development SETTINGS_PROD = swh.web.settings.production yarn-install: package.json $(YARN) install --frozen-lockfile .PHONY: build-webpack-dev build-webpack-dev: yarn-install $(YARN) build-dev .PHONY: build-webpack-test build-webpack-test: yarn-install $(YARN) build-test .PHONY: build-webpack-dev-no-verbose build-webpack-dev-no-verbose: yarn-install $(YARN) build-dev >/dev/null .PHONY: build-webpack-prod build-webpack-prod: yarn-install $(YARN) build .PHONY: run-migrations-dev run-migrations-dev: + python3 swh/web/manage.py rename_app --settings=$(SETTINGS_DEV) swh_web_common swh_web_save_code_now python3 swh/web/manage.py migrate --settings=$(SETTINGS_DEV) -v0 .PHONY: run-migrations-prod run-migrations-prod: + django-admin rename_app --settings=$(SETTINGS_PROD) swh_web_common swh_web_save_code_now django-admin migrate --settings=$(SETTINGS_PROD) -v0 .PHONY: run-migrations-test run-migrations-test: rm -f swh-web-test.sqlite3 django-admin migrate --settings=$(SETTINGS_TEST) -v0 add-users-test: run-migrations-test cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_TEST) cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_TEST) add-users-dev: run-migrations-dev cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_DEV) cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_DEV) add-users-prod: run-migrations-prod cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_PROD) cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_PROD) .PHONY: clear-memcached clear-memcached: echo "flush_all" | nc -q 2 localhost 11211 2>/dev/null run-django-webpack-devserver: add-users-dev yarn-install bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \ # ensure all child processes will be killed by PGID when exiting \ ps -o pgid= $$$$ | grep -o [0-9]* | xargs pkill -g' SIGINT SIGTERM ERR EXIT; \ $(YARN) start-dev & sleep 10 && cd swh/web && \ python3 manage.py runserver --nostatic --settings=$(SETTINGS_DEV) || exit 1" run-django-webpack-dev: build-webpack-dev add-users-dev python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_DEV) run-django-webpack-prod: build-webpack-prod add-users-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_PROD) run-django-server-dev: add-users-dev python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_DEV) run-django-server-prod: add-users-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_PROD) run-gunicorn-server: add-users-prod clear-memcached DJANGO_SETTINGS_MODULE=$(SETTINGS_PROD) \ gunicorn --bind 127.0.0.1:5004 \ --threads 2 \ --workers 2 'django.core.wsgi:get_wsgi_application()' run-django-webpack-memory-storages: build-webpack-dev add-users-test python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST) test-full: $(TEST) $(TESTFULL_FLAGS) $(TEST_DIRS) .PHONY: test-frontend-cmd test-frontend-cmd: build-webpack-test add-users-test bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \ jobs -p | xargs -r kill' SIGINT SIGTERM ERR EXIT; \ python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST) & \ sleep 10 && $(YARN) run cypress run --config numTestsKeptInMemory=0 && \ $(YARN) mochawesome && $(YARN) nyc-report" test-frontend: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend: test-frontend-cmd test-frontend-full: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full: test-frontend-cmd .PHONY: test-frontend-ui-cmd test-frontend-ui-cmd: add-users-test yarn-install # ensure all child processes will be killed when hitting Ctrl-C in terminal # or manually closing the Cypress UI window, killing by PGID seems the only # reliable way to do it in that case bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \ ps -o pgid= $$$$ | grep -o [0-9]* | xargs pkill -g' SIGINT SIGTERM ERR EXIT; \ $(YARN) start-dev & \ python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST) & \ sleep 10 && $(YARN) run cypress open" test-frontend-ui: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend-ui: test-frontend-ui-cmd test-frontend-full-ui: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full-ui: test-frontend-ui-cmd # Override default rule to make sure DJANGO env var is properly set. It # *should* work without any override thanks to the mypy django-stubs plugin, # but it currently doesn't; see # https://github.com/typeddjango/django-stubs/issues/166 check-mypy: DJANGO_SETTINGS_MODULE=$(SETTINGS_DEV) $(MYPY) $(MYPYFLAGS) swh diff --git a/swh/web/admin/urls.py b/swh/web/admin/urls.py index dc8243b1..4ef703a1 100644 --- a/swh/web/admin/urls.py +++ b/swh/web/admin/urls.py @@ -1,28 +1,27 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.contrib.auth.views import LoginView from django.shortcuts import redirect from django.urls import re_path as url from swh.web.admin.adminurls import AdminUrls import swh.web.admin.deposit # noqa -import swh.web.admin.origin_save # noqa from swh.web.config import is_feature_enabled if is_feature_enabled("add_forge_now"): import swh.web.admin.add_forge_now # noqa def _admin_default_view(request): return redirect("admin-origin-save-requests") urlpatterns = [ url(r"^$", _admin_default_view, name="admin"), url(r"^login/$", LoginView.as_view(template_name="login.html"), name="login"), ] urlpatterns += AdminUrls.get_url_patterns() diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py index 4124955a..04297017 100644 --- a/swh/web/api/urls.py +++ b/swh/web/api/urls.py @@ -1,23 +1,23 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information + from swh.web.api.apiurls import APIUrls import swh.web.api.views.add_forge_now # noqa import swh.web.api.views.content # noqa import swh.web.api.views.directory # noqa import swh.web.api.views.graph # noqa import swh.web.api.views.identifiers # noqa import swh.web.api.views.metadata # noqa import swh.web.api.views.origin # noqa -import swh.web.api.views.origin_save # noqa import swh.web.api.views.ping # noqa import swh.web.api.views.raw # noqa import swh.web.api.views.release # noqa import swh.web.api.views.revision # noqa import swh.web.api.views.snapshot # noqa import swh.web.api.views.stat # noqa import swh.web.api.views.vault # noqa urlpatterns = APIUrls.get_url_patterns() diff --git a/swh/web/common/__init__.py b/swh/web/common/__init__.py index 80eb6395..e69de29b 100644 --- a/swh/web/common/__init__.py +++ b/swh/web/common/__init__.py @@ -1,6 +0,0 @@ -# Copyright (C) 2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU Affero General Public License version 3, or any later version -# See top-level LICENSE file for more information - -default_app_config = "swh.web.common.apps.SwhWebCommonConfig" diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py index 66deffae..2c54ae54 100644 --- a/swh/web/common/swh_templatetags.py +++ b/swh/web/common/swh_templatetags.py @@ -1,149 +1,149 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re from django import template from django.utils.safestring import mark_safe from swh.web.common.converters import SWHDjangoJSONEncoder -from swh.web.common.origin_save import get_savable_visit_types from swh.web.common.utils import rst_to_html +from swh.web.save_code_now.origin_save import get_savable_visit_types register = template.Library() @register.filter def docstring_display(docstring): """ Utility function to htmlize reST-formatted documentation in browsable api. """ return rst_to_html(docstring) @register.filter def urlize_links_and_mails(text): """Utility function for decorating api links in browsable api. Args: text: whose content matching links should be transformed into contextual API or Browse html links. Returns The text transformed if any link is found. The text as is otherwise. """ if 'href="' not in text: text = re.sub(r"(http.*)", r'\1', text) return re.sub(r'([^ <>"]+@[^ <>"]+)', r'\1', text) return text @register.filter def urlize_header_links(text): """Utility function for decorating headers links in browsable api. Args text: Text whose content contains Link header value Returns: The text transformed with html link if any link is found. The text as is otherwise. """ ret = re.sub( r'<(http[^<>]+)>; rel="([^,]+)"', r'<\1>; rel="\2"\n', text ).replace("\n,", "\n") return ret[:-1] @register.filter def jsonify(obj): """Utility function for converting a django template variable to JSON in order to use it in script tags. Args obj: Any django template context variable Returns: JSON representation of the variable. """ return mark_safe(json.dumps(obj, cls=SWHDjangoJSONEncoder)) @register.filter def sub(value, arg): """Django template filter for subtracting two numbers Args: value (int/float): the value to subtract from arg (int/float): the value to subtract to Returns: int/float: The subtraction result """ return value - arg @register.filter def mul(value, arg): """Django template filter for multiplying two numbers Args: value (int/float): the value to multiply from arg (int/float): the value to multiply with Returns: int/float: The multiplication result """ return value * arg @register.filter def key_value(dict, key): """Django template filter to get a value in a dictionary. Args: dict (dict): a dictionary key (str): the key to lookup value Returns: The requested value in the dictionary """ return dict[key] @register.filter def visit_type_savable(visit_type: str) -> bool: """Django template filter to check if a save request can be created for a given visit type. Args: visit_type: the type of visit Returns: If the visit type is saveable or not """ return visit_type in get_savable_visit_types() @register.filter def split(value, arg): """Django template filter to split a string. Args: value (str): the string to split arg (str): the split separator Returns: list: the split string parts """ return value.split(arg) diff --git a/swh/web/config.py b/swh/web/config.py index dc8423e9..111455eb 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,241 +1,242 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.counters import get_counters from swh.indexer.storage import get_indexer_storage from swh.scheduler import get_scheduler from swh.search import get_search from swh.storage import get_storage from swh.vault import get_vault from swh.web import settings SWH_WEB_SERVER_NAME = "archive.softwareheritage.org" SWH_WEB_INTERNAL_SERVER_NAME = "archive.internal.softwareheritage.org" SWH_WEB_STAGING_SERVER_NAMES = [ "webapp.staging.swh.network", "webapp.internal.staging.swh.network", ] SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { "allowed_hosts": ("list", []), "storage": ( "dict", { "cls": "remote", "url": "http://127.0.0.1:5002/", "timeout": 10, }, ), "indexer_storage": ( "dict", { "cls": "remote", "url": "http://127.0.0.1:5007/", "timeout": 1, }, ), "counters": ( "dict", { "cls": "remote", "url": "http://127.0.0.1:5011/", "timeout": 1, }, ), "search": ( "dict", { "cls": "remote", "url": "http://127.0.0.1:5010/", "timeout": 10, }, ), "search_config": ( "dict", { "metadata_backend": "swh-indexer-storage", }, # or "swh-search" ), "log_dir": ("string", "/tmp/swh/log"), "debug": ("bool", False), "serve_assets": ("bool", False), "host": ("string", "127.0.0.1"), "port": ("int", 5004), "secret_key": ("string", "development key"), # do not display code highlighting for content > 1MB "content_display_max_size": ("int", 5 * 1024 * 1024), "snapshot_content_max_size": ("int", 1000), "throttling": ( "dict", { "cache_uri": None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None "scopes": { "swh_api": { "limiter_rate": {"default": "120/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_search": { "limiter_rate": {"default": "10/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_vault_cooking": { "limiter_rate": {"default": "120/h", "GET": "60/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_save_origin": { "limiter_rate": {"default": "120/h", "POST": "10/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_visit_latest": { "limiter_rate": {"default": "700/m"}, "exempted_networks": ["127.0.0.0/8"], }, }, }, ), "vault": ( "dict", { "cls": "remote", "args": { "url": "http://127.0.0.1:5005/", }, }, ), "scheduler": ("dict", {"cls": "remote", "url": "http://127.0.0.1:5008/"}), "development_db": ("string", os.path.join(SETTINGS_DIR, "db.sqlite3")), "test_db": ("dict", {"name": "swh-web-test"}), "production_db": ("dict", {"name": "swh-web"}), "deposit": ( "dict", { "private_api_url": "https://deposit.softwareheritage.org/1/private/", "private_api_user": "swhworker", "private_api_password": "some-password", }, ), "e2e_tests_mode": ("bool", False), "es_workers_index_url": ("string", ""), "history_counters_url": ( "string", ( "http://counters1.internal.softwareheritage.org:5011" "/counters_history/history.json" ), ), "client_config": ("dict", {}), "keycloak": ("dict", {"server_url": "", "realm_name": ""}), "graph": ( "dict", { "server_url": "http://graph.internal.softwareheritage.org:5009/graph/", "max_edges": {"staff": 0, "user": 100000, "anonymous": 1000}, }, ), "status": ( "dict", { "server_url": "https://status.softwareheritage.org/", "json_path": "1.0/status/578e5eddcdc0cc7951000520", }, ), "counters_backend": ("string", "swh-storage"), # or "swh-counters" "staging_server_names": ("list", SWH_WEB_STAGING_SERVER_NAMES), "instance_name": ("str", "archive-test.softwareheritage.org"), "give": ("dict", {"public_key": "", "token": ""}), "features": ("dict", {"add_forge_now": True}), "add_forge_now": ("dict", {"email_address": "add-forge-now@example.com"}), "swh_extra_django_apps": ( "list", [ "swh.web.inbound_email", "swh.web.add_forge_now", "swh.web.mailmap", + "swh.web.save_code_now", ], ), } swhweb_config: Dict[str, Any] = {} def get_config(config_file="web/web"): """Read the configuration file `config_file`. If an environment variable SWH_CONFIG_FILENAME is defined, this takes precedence over the config_file parameter. In any case, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration. """ if not swhweb_config: config_filename = os.environ.get("SWH_CONFIG_FILENAME") if config_filename: config_file = config_filename cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, "log_dir") if swhweb_config.get("search"): swhweb_config["search"] = get_search(**swhweb_config["search"]) else: swhweb_config["search"] = None swhweb_config["storage"] = get_storage(**swhweb_config["storage"]) swhweb_config["vault"] = get_vault(**swhweb_config["vault"]) swhweb_config["indexer_storage"] = get_indexer_storage( **swhweb_config["indexer_storage"] ) swhweb_config["scheduler"] = get_scheduler(**swhweb_config["scheduler"]) swhweb_config["counters"] = get_counters(**swhweb_config["counters"]) return swhweb_config def search(): """Return the current application's search.""" return get_config()["search"] def storage(): """Return the current application's storage.""" return get_config()["storage"] def vault(): """Return the current application's vault.""" return get_config()["vault"] def indexer_storage(): """Return the current application's indexer storage.""" return get_config()["indexer_storage"] def scheduler(): """Return the current application's scheduler.""" return get_config()["scheduler"] def counters(): """Return the current application's counters.""" return get_config()["counters"] def is_feature_enabled(feature_name: str) -> bool: """Determine whether a feature is enabled or not. If feature_name is not found at all, it's considered disabled. """ return get_config()["features"].get(feature_name, False) diff --git a/swh/web/misc/metrics.py b/swh/web/misc/metrics.py index 8ee7d39d..95764aad 100644 --- a/swh/web/misc/metrics.py +++ b/swh/web/misc/metrics.py @@ -1,21 +1,21 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from prometheus_client.exposition import CONTENT_TYPE_LATEST, generate_latest from django.http import HttpResponse -from swh.web.common.origin_save import compute_save_requests_metrics from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY +from swh.web.save_code_now.origin_save import compute_save_requests_metrics def prometheus_metrics(request): compute_save_requests_metrics() return HttpResponse( content=generate_latest(registry=SWH_WEB_METRICS_REGISTRY), content_type=CONTENT_TYPE_LATEST, ) diff --git a/swh/web/misc/urls.py b/swh/web/misc/urls.py index c9b673f8..683258c2 100644 --- a/swh/web/misc/urls.py +++ b/swh/web/misc/urls.py @@ -1,121 +1,120 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import requests from django.conf.urls import include from django.contrib.staticfiles import finders from django.http import JsonResponse from django.shortcuts import render from django.urls import re_path as url from django.views.decorators.clickjacking import xframe_options_exempt from swh.web.common import archive from swh.web.common.exc import sentry_capture_exception from swh.web.config import get_config from swh.web.misc.metrics import prometheus_metrics def _jslicenses(request): jslicenses_file = finders.find("jssources/jslicenses.json") jslicenses_data = json.load(open(jslicenses_file)) jslicenses_data = sorted( jslicenses_data.items(), key=lambda item: item[0].split("/")[-1] ) return render(request, "misc/jslicenses.html", {"jslicenses_data": jslicenses_data}) def _stat_counters(request): stat_counters = archive.stat_counters() url = get_config()["history_counters_url"] stat_counters_history = {} try: response = requests.get(url, timeout=5) stat_counters_history = json.loads(response.text) except Exception as exc: sentry_capture_exception(exc) counters = { "stat_counters": stat_counters, "stat_counters_history": stat_counters_history, } return JsonResponse(counters) @xframe_options_exempt def hiring_banner(request): lang = request.GET.get("lang") return render( request, "misc/hiring-banner-iframe.html", { "lang": lang if lang else "en", }, ) urlpatterns = [ url(r"^", include("swh.web.misc.coverage")), url(r"^jslicenses/$", _jslicenses, name="jslicenses"), - url(r"^", include("swh.web.misc.origin_save")), url(r"^stat_counters/$", _stat_counters, name="stat-counters"), url(r"^", include("swh.web.misc.badges")), url(r"^metrics/prometheus/$", prometheus_metrics, name="metrics-prometheus"), url(r"^", include("swh.web.misc.iframe")), url(r"^", include("swh.web.misc.fundraising")), url(r"^hiring/banner/$", hiring_banner, name="swh-hiring-banner"), ] # when running end to end tests through cypress, declare some extra # endpoints to provide input data for some of those tests if get_config()["e2e_tests_mode"]: from swh.web.tests.views import ( get_content_code_data_all_exts, get_content_code_data_all_filenames, get_content_code_data_by_ext, get_content_code_data_by_filename, get_content_other_data_by_ext, ) urlpatterns.append( url( r"^tests/data/content/code/extension/(?P.+)/$", get_content_code_data_by_ext, name="tests-content-code-extension", ) ) urlpatterns.append( url( r"^tests/data/content/other/extension/(?P.+)/$", get_content_other_data_by_ext, name="tests-content-other-extension", ) ) urlpatterns.append( url( r"^tests/data/content/code/extensions/$", get_content_code_data_all_exts, name="tests-content-code-extensions", ) ) urlpatterns.append( url( r"^tests/data/content/code/filename/(?P.+)/$", get_content_code_data_by_filename, name="tests-content-code-filename", ) ) urlpatterns.append( url( r"^tests/data/content/code/filenames/$", get_content_code_data_all_filenames, name="tests-content-code-filenames", ) ) diff --git a/swh/web/common/__init__.py b/swh/web/save_code_now/__init__.py similarity index 61% copy from swh/web/common/__init__.py copy to swh/web/save_code_now/__init__.py index 80eb6395..0bff01a9 100644 --- a/swh/web/common/__init__.py +++ b/swh/web/save_code_now/__init__.py @@ -1,6 +1,6 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -default_app_config = "swh.web.common.apps.SwhWebCommonConfig" +default_app_config = "swh.web.save_code_now.apps.SaveCodeNowConfig" diff --git a/swh/web/admin/origin_save.py b/swh/web/save_code_now/admin_views.py similarity index 70% rename from swh/web/admin/origin_save.py rename to swh/web/save_code_now/admin_views.py index 3b722e5f..13296335 100644 --- a/swh/web/admin/origin_save.py +++ b/swh/web/save_code_now/admin_views.py @@ -1,220 +1,181 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse, JsonResponse from django.shortcuts import render from django.views.decorators.http import require_POST -from swh.web.admin.adminurls import admin_route -from swh.web.common.models import ( +from swh.web.save_code_now.models import ( SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) -from swh.web.common.origin_save import ( +from swh.web.save_code_now.origin_save import ( SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, create_save_origin_request, ) -@admin_route(r"origin/save/requests/", view_name="admin-origin-save-requests") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_requests(request): - return render(request, "admin/origin-save/requests.html") +def admin_origin_save_requests(request): + return render(request, "admin/origin-save-requests.html") -@admin_route(r"origin/save/filters/", view_name="admin-origin-save-filters") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_filters(request): - return render(request, "admin/origin-save/filters.html") +def admin_origin_save_filters(request): + return render(request, "admin/origin-save-filters.html") def _datatables_origin_urls_response(request, urls_query_set): search_value = request.GET["search[value]"] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} table_data["draw"] = int(request.GET["draw"]) table_data["recordsTotal"] = urls_query_set.count() table_data["recordsFiltered"] = urls_query_set.count() length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list table_data["data"] = [{"url": u.url} for u in urls_query_set] return JsonResponse(table_data) -@admin_route( - r"origin/save/authorized_urls/list/", - view_name="admin-origin-save-authorized-urls-list", -) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_authorized_urls_list(request): +def admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) -@admin_route( - r"origin/save/authorized_urls/add/(?P.+)/", - view_name="admin-origin-save-add-authorized-url", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_add_authorized_url(request, origin_url): +def admin_origin_save_add_authorized_url(request, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = SaveOriginRequest.objects.filter( origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING ) # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.visit_type, psr.origin_url) status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) -@admin_route( - r"origin/save/authorized_urls/remove/(?P.+)/", - view_name="admin-origin-save-remove-authorized-url", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_remove_authorized_url(request, origin_url): +def admin_origin_save_remove_authorized_url(request, origin_url): try: entry = SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) -@admin_route( - r"origin/save/unauthorized_urls/list/", - view_name="admin-origin-save-unauthorized-urls-list", -) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_unauthorized_urls_list(request): +def admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) -@admin_route( - r"origin/save/unauthorized_urls/add/(?P.+)/", - view_name="admin-origin-save-add-unauthorized-url", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_add_unauthorized_url(request, origin_url): +def admin_origin_save_add_unauthorized_url(request, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = SaveOriginRequest.objects.filter( origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING ) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED psr.save() status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) -@admin_route( - r"origin/save/unauthorized_urls/remove/(?P.+)/", - view_name="admin-origin-save-remove-unauthorized-url", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_remove_unauthorized_url(request, origin_url): +def admin_origin_save_remove_unauthorized_url(request, origin_url): try: entry = SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) -@admin_route( - r"origin/save/request/accept/(?P.+)/url/(?P.+)/", - view_name="admin-origin-save-request-accept", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_request_accept(request, visit_type, origin_url): +def admin_origin_save_request_accept(request, visit_type, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveAuthorizedOrigin.objects.create(url=origin_url) create_save_origin_request(visit_type, origin_url) return HttpResponse(status=200) -@admin_route( - r"origin/save/request/reject/(?P.+)/url/(?P.+)/", - view_name="admin-origin-save-request-reject", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_request_reject(request, visit_type, origin_url): +def admin_origin_save_request_reject(request, visit_type, origin_url): try: sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING ) except ObjectDoesNotExist: status_code = 404 else: status_code = 200 sor.status = SAVE_REQUEST_REJECTED sor.note = json.loads(request.body).get("note") sor.save() return HttpResponse(status=status_code) -@admin_route( - r"origin/save/request/remove/(?P.+)/", - view_name="admin-origin-save-request-remove", -) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _admin_origin_save_request_remove(request, sor_id): +def admin_origin_save_request_remove(request, sor_id): try: entry = SaveOriginRequest.objects.get(id=sor_id) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) diff --git a/swh/web/api/views/origin_save.py b/swh/web/save_code_now/api_views.py similarity index 99% rename from swh/web/api/views/origin_save.py rename to swh/web/save_code_now/api_views.py index 1c42e5ea..68d977aa 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/save_code_now/api_views.py @@ -1,127 +1,127 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Optional, cast from rest_framework.request import Request from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.auth.utils import ( API_SAVE_ORIGIN_PERMISSION, SWH_AMBASSADOR_PERMISSION, privileged_user, ) -from swh.web.common.origin_save import ( +from swh.web.save_code_now.origin_save import ( create_save_origin_request, get_savable_visit_types, get_save_origin_requests, ) def _savable_visit_types() -> str: docstring = "" if os.environ.get("DJANGO_SETTINGS_MODULE") != "swh.web.settings.tests": visit_types = sorted(get_savable_visit_types()) docstring = "" for visit_type in visit_types[:-1]: docstring += f"**{visit_type}**, " docstring += f"and **{visit_types[-1]}**" return docstring @api_route( r"/origin/save/(?P.+)/url/(?P.+)/", "api-1-save-origin", methods=["GET", "POST"], throttle_scope="swh_save_origin", never_cache=True, ) @api_doc("/origin/save/") @format_docstring(visit_types=_savable_visit_types()) def api_save_origin(request: Request, visit_type: str, origin_url: str): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeeded**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are {visit_types}) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeeded** or **failed** :>json string visit_date: the date (in iso format) of the visit if a visit occurred, null otherwise. :>json string visit_status: the status of the visit, either **full**, **partial**, **not_found** or **failed** if a visit occurred, null otherwise. :>json string note: optional note giving details about the save request, for instance why it has been rejected :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ data = request.data or {} if request.method == "POST": sor = create_save_origin_request( visit_type, origin_url, privileged_user( request, permissions=[SWH_AMBASSADOR_PERMISSION, API_SAVE_ORIGIN_PERMISSION], ), user_id=cast(Optional[int], request.user.id), **data, ) del sor["id"] return sor else: sors = get_save_origin_requests(visit_type, origin_url) for sor in sors: del sor["id"] return sors diff --git a/swh/web/common/apps.py b/swh/web/save_code_now/apps.py similarity index 59% rename from swh/web/common/apps.py rename to swh/web/save_code_now/apps.py index f1e7582e..ad2c49e9 100644 --- a/swh/web/common/apps.py +++ b/swh/web/save_code_now/apps.py @@ -1,11 +1,11 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.apps import AppConfig -class SwhWebCommonConfig(AppConfig): - name = "swh.web.common" - label = "swh_web_common" +class SaveCodeNowConfig(AppConfig): + name = "swh.web.save_code_now" + label = "swh_web_save_code_now" diff --git a/swh/web/common/management/__init__.py b/swh/web/save_code_now/management/__init__.py similarity index 100% copy from swh/web/common/management/__init__.py copy to swh/web/save_code_now/management/__init__.py diff --git a/swh/web/common/management/commands/__init__.py b/swh/web/save_code_now/management/commands/__init__.py similarity index 100% rename from swh/web/common/management/commands/__init__.py rename to swh/web/save_code_now/management/commands/__init__.py diff --git a/swh/web/common/management/commands/refresh_savecodenow_statuses.py b/swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py similarity index 93% rename from swh/web/common/management/commands/refresh_savecodenow_statuses.py rename to swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py index e697d92d..f6b81897 100644 --- a/swh/web/common/management/commands/refresh_savecodenow_statuses.py +++ b/swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py @@ -1,63 +1,63 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Set from django.core.management.base import BaseCommand from swh.scheduler.model import ListedOrigin -from swh.web.common.models import VISIT_STATUS_FULL, VISIT_STATUS_PARTIAL -from swh.web.common.origin_save import refresh_save_origin_request_statuses from swh.web.config import get_config from swh.web.config import scheduler as get_scheduler +from swh.web.save_code_now.models import VISIT_STATUS_FULL, VISIT_STATUS_PARTIAL +from swh.web.save_code_now.origin_save import refresh_save_origin_request_statuses class Command(BaseCommand): help = "Refresh save code now origin request statuses periodically" def handle(self, *args, **options): """Refresh origin save code now requests. For the origin visit types, svn, git, hg, this also installs the origins as recurring origins to visit. """ refreshed_statuses = refresh_save_origin_request_statuses() scheduler = get_scheduler() # then schedule the origins with meaningful status and type to be ingested # regularly lister = scheduler.get_or_create_lister( name="save-code-now", instance_name=get_config()["instance_name"] ) origins: Set[str, str] = set() listed_origins = [] for status in refreshed_statuses: visit_type = status["visit_type"] # only deal with git, svn, hg visit types if visit_type == "archives": continue # only keep satisfying visit statuses if status["visit_status"] not in (VISIT_STATUS_PARTIAL, VISIT_STATUS_FULL): continue origin = status["origin_url"] # drop duplicates within the same batch if (visit_type, origin) in origins: continue origins.add((visit_type, origin)) listed_origins.append( ListedOrigin(lister_id=lister.id, visit_type=visit_type, url=origin) ) if listed_origins: scheduler.record_listed_origins(listed_origins) if len(refreshed_statuses) > 0: msg = f"Successfully updated {len(refreshed_statuses)} save request(s)." else: msg = "Nothing to do." self.stdout.write(self.style.SUCCESS(msg)) diff --git a/swh/web/common/migrations/0001_initial.py b/swh/web/save_code_now/migrations/0001_initial.py similarity index 96% rename from swh/web/common/migrations/0001_initial.py rename to swh/web/save_code_now/migrations/0001_initial.py index 30903eee..3963ce61 100644 --- a/swh/web/common/migrations/0001_initial.py +++ b/swh/web/save_code_now/migrations/0001_initial.py @@ -1,96 +1,98 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations, models _authorized_origins = [ "https://github.com/", "https://gitlab.com/", "https://bitbucket.org/", "https://git.code.sf.net/", "http://git.code.sf.net/", "https://hg.code.sf.net/", "http://hg.code.sf.net/", "https://svn.code.sf.net/", "http://svn.code.sf.net/", ] def _populate_save_authorized_origins(apps, schema_editor): - SaveAuthorizedOrigin = apps.get_model("swh_web_common", "SaveAuthorizedOrigin") + SaveAuthorizedOrigin = apps.get_model( + "swh_web_save_code_now", "SaveAuthorizedOrigin" + ) for origin_url in _authorized_origins: SaveAuthorizedOrigin.objects.create(url=origin_url) class Migration(migrations.Migration): initial = True operations = [ migrations.CreateModel( name="SaveAuthorizedOrigin", fields=[ ( "id", models.AutoField( auto_created=True, primary_key=True, serialize=False, verbose_name="ID", ), ), ("url", models.CharField(max_length=200)), ], options={ "db_table": "save_authorized_origin", }, ), migrations.CreateModel( name="SaveOriginRequest", fields=[ ("id", models.BigAutoField(primary_key=True, serialize=False)), ("request_date", models.DateTimeField(auto_now_add=True)), ("origin_type", models.CharField(max_length=200)), ("origin_url", models.CharField(max_length=200)), ( "status", models.TextField( choices=[ ("accepted", "accepted"), ("rejected", "rejected"), ("pending", "pending"), ], default="pending", ), ), ("loading_task_id", models.IntegerField(default=-1)), ], options={ "db_table": "save_origin_request", "ordering": ["-id"], }, ), migrations.CreateModel( name="SaveUnauthorizedOrigin", fields=[ ( "id", models.AutoField( auto_created=True, primary_key=True, serialize=False, verbose_name="ID", ), ), ("url", models.CharField(max_length=200)), ], options={ "db_table": "save_unauthorized_origin", }, ), migrations.RunPython(_populate_save_authorized_origins), ] diff --git a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py b/swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py similarity index 92% rename from swh/web/common/migrations/0002_saveoriginrequest_visit_date.py rename to swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py index b2792f2e..e203ba7b 100644 --- a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py +++ b/swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py @@ -1,23 +1,23 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0001_initial"), + ("swh_web_save_code_now", "0001_initial"), ] operations = [ migrations.AddField( model_name="saveoriginrequest", name="visit_date", field=models.DateTimeField(null=True), ), ] diff --git a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py b/swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py similarity index 91% rename from swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py rename to swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py index c539b675..98afdbd0 100644 --- a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py +++ b/swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py @@ -1,52 +1,52 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations, models from swh.web.config import scheduler def _remove_archived_tasks_with_no_saved_status(apps, schema_editor): """ Scheduler tasks are archived on a regular basis so their completion state could not be known anymore as previous to this migration, the loading task status was not stored in the database. So remove the rows associated to already archived tasks as the loading status can not be retrieved anymore. """ - SaveOriginRequest = apps.get_model("swh_web_common", "SaveOriginRequest") + SaveOriginRequest = apps.get_model("swh_web_save_code_now", "SaveOriginRequest") no_saved_status_tasks = [] for sor in SaveOriginRequest.objects.all(): tasks = scheduler().get_tasks([sor.loading_task_id]) if not tasks: no_saved_status_tasks.append(sor.loading_task_id) SaveOriginRequest.objects.filter(loading_task_id__in=no_saved_status_tasks).delete() class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0002_saveoriginrequest_visit_date"), + ("swh_web_save_code_now", "0002_saveoriginrequest_visit_date"), ] operations = [ migrations.AddField( model_name="saveoriginrequest", name="loading_task_status", field=models.TextField( choices=[ ("not created", "not created"), ("not yet scheduled", "not yet scheduled"), ("scheduled", "scheduled"), ("succeed", "succeed"), ("failed", "failed"), ], default="not created", ), ), migrations.RunPython(_remove_archived_tasks_with_no_saved_status), ] diff --git a/swh/web/common/migrations/0004_auto_20190204_1324.py b/swh/web/save_code_now/migrations/0004_auto_20190204_1324.py similarity index 92% rename from swh/web/common/migrations/0004_auto_20190204_1324.py rename to swh/web/save_code_now/migrations/0004_auto_20190204_1324.py index 2021a315..f77d460d 100644 --- a/swh/web/common/migrations/0004_auto_20190204_1324.py +++ b/swh/web/save_code_now/migrations/0004_auto_20190204_1324.py @@ -1,33 +1,33 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0003_saveoriginrequest_loading_task_status"), + ("swh_web_save_code_now", "0003_saveoriginrequest_loading_task_status"), ] operations = [ migrations.AlterField( model_name="saveoriginrequest", name="loading_task_status", field=models.TextField( choices=[ ("not created", "not created"), ("not yet scheduled", "not yet scheduled"), ("scheduled", "scheduled"), ("succeed", "succeed"), ("failed", "failed"), ("running", "running"), ], default="not created", ), ), ] diff --git a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py b/swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py similarity index 85% rename from swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py rename to swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py index 748c3f53..720f95e3 100644 --- a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py +++ b/swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py @@ -1,25 +1,25 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations -from swh.web.common.models import SaveAuthorizedOrigin +from swh.web.save_code_now.models import SaveAuthorizedOrigin def _remove_duplicated_urls_in_authorized_list(apps, schema_editor): sao = SaveAuthorizedOrigin.objects for url in sao.values_list("url", flat=True).distinct(): sao.filter(pk__in=sao.filter(url=url).values_list("id", flat=True)[1:]).delete() class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0004_auto_20190204_1324"), + ("swh_web_save_code_now", "0004_auto_20190204_1324"), ] operations = [migrations.RunPython(_remove_duplicated_urls_in_authorized_list)] diff --git a/swh/web/common/migrations/0006_rename_origin_type.py b/swh/web/save_code_now/migrations/0006_rename_origin_type.py similarity index 87% rename from swh/web/common/migrations/0006_rename_origin_type.py rename to swh/web/save_code_now/migrations/0006_rename_origin_type.py index adbf4e6c..0770bf20 100644 --- a/swh/web/common/migrations/0006_rename_origin_type.py +++ b/swh/web/save_code_now/migrations/0006_rename_origin_type.py @@ -1,23 +1,23 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import unicode_literals from django.db import migrations class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0005_remove_duplicated_authorized_origins"), + ("swh_web_save_code_now", "0005_remove_duplicated_authorized_origins"), ] operations = [ migrations.RenameField( model_name="saveoriginrequest", old_name="origin_type", new_name="visit_type", ), ] diff --git a/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py b/swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py similarity index 90% rename from swh/web/common/migrations/0007_save_request_task_status_fix_typo.py rename to swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py index 78f2c792..9f86cf80 100644 --- a/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py +++ b/swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py @@ -1,43 +1,43 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import migrations, models def _rename_request_status_from_succeed_to_succeeded(apps, schema_editor): """ Fix a typo in save request status value. """ - SaveOriginRequest = apps.get_model("swh_web_common", "SaveOriginRequest") + SaveOriginRequest = apps.get_model("swh_web_save_code_now", "SaveOriginRequest") for sor in SaveOriginRequest.objects.all(): if sor.loading_task_status == "succeed": sor.loading_task_status = "succeeded" sor.save() class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0006_rename_origin_type"), + ("swh_web_save_code_now", "0006_rename_origin_type"), ] operations = [ migrations.AlterField( model_name="saveoriginrequest", name="loading_task_status", field=models.TextField( choices=[ ("not created", "not created"), ("not yet scheduled", "not yet scheduled"), ("scheduled", "scheduled"), ("succeeded", "succeeded"), ("failed", "failed"), ("running", "running"), ], default="not created", ), ), migrations.RunPython(_rename_request_status_from_succeed_to_succeeded), ] diff --git a/swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py b/swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py similarity index 91% rename from swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py rename to swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py index dd7afbb3..badcd0c1 100644 --- a/swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py +++ b/swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py @@ -1,29 +1,29 @@ # Generated by Django 2.2.15 on 2021-01-06 13:27 # Adds indexes to the Save Code Now tables. from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0007_save_request_task_status_fix_typo"), + ("swh_web_save_code_now", "0007_save_request_task_status_fix_typo"), ] operations = [ migrations.AddIndex( model_name="saveauthorizedorigin", index=models.Index(fields=["url"], name="save_author_url_3e4e9d_idx"), ), migrations.AddIndex( model_name="saveoriginrequest", index=models.Index( fields=["origin_url", "status"], name="save_origin_origin__b46350_idx" ), ), migrations.AddIndex( model_name="saveunauthorizedorigin", index=models.Index(fields=["url"], name="save_unauth_url_c008fc_idx"), ), ] diff --git a/swh/web/common/migrations/0009_saveoriginrequest_visit_status.py b/swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py similarity index 92% rename from swh/web/common/migrations/0009_saveoriginrequest_visit_status.py rename to swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py index a8adf6c1..476a0ad7 100644 --- a/swh/web/common/migrations/0009_saveoriginrequest_visit_status.py +++ b/swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py @@ -1,32 +1,32 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated by Django 2.2.19 on 2021-04-19 16:38 from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0008_save-code-now_indexes_20210106_1327"), + ("swh_web_save_code_now", "0008_save-code-now_indexes_20210106_1327"), ] operations = [ migrations.AddField( model_name="saveoriginrequest", name="visit_status", field=models.TextField( choices=[ ("created", "created"), ("ongoing", "ongoing"), ("full", "full"), ("partial", "partial"), ("not_found", "not_found"), ("failed", "failed"), ], null=True, ), ), ] diff --git a/swh/web/common/migrations/0010_saveoriginrequest_user_id.py b/swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py similarity index 89% rename from swh/web/common/migrations/0010_saveoriginrequest_user_id.py rename to swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py index d2ceceb9..ecd93bbe 100644 --- a/swh/web/common/migrations/0010_saveoriginrequest_user_id.py +++ b/swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py @@ -1,22 +1,22 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated by Django 2.2.20 on 2021-05-03 14:16 from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0009_saveoriginrequest_visit_status"), + ("swh_web_save_code_now", "0009_saveoriginrequest_visit_status"), ] operations = [ migrations.AddField( model_name="saveoriginrequest", name="user_id", field=models.CharField(max_length=200, null=True), ), ] diff --git a/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py b/swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py similarity index 90% rename from swh/web/common/migrations/0011_saveoriginrequest_user_ids.py rename to swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py index 353c1790..2756cf95 100644 --- a/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py +++ b/swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py @@ -1,25 +1,25 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0010_saveoriginrequest_user_id"), + ("swh_web_save_code_now", "0010_saveoriginrequest_user_id"), ] operations = [ migrations.RemoveField( model_name="saveoriginrequest", name="user_id", ), migrations.AddField( model_name="saveoriginrequest", name="user_ids", field=models.TextField(null=True), ), ] diff --git a/swh/web/common/migrations/0012_saveoriginrequest_note.py b/swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py similarity index 88% rename from swh/web/common/migrations/0012_saveoriginrequest_note.py rename to swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py index 6df1582f..582c5ed6 100644 --- a/swh/web/common/migrations/0012_saveoriginrequest_note.py +++ b/swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py @@ -1,21 +1,21 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ("swh_web_common", "0011_saveoriginrequest_user_ids"), + ("swh_web_save_code_now", "0011_saveoriginrequest_user_ids"), ] operations = [ migrations.AddField( model_name="saveoriginrequest", name="note", field=models.TextField(null=True), ), ] diff --git a/swh/web/common/migrations/__init__.py b/swh/web/save_code_now/migrations/__init__.py similarity index 100% rename from swh/web/common/migrations/__init__.py rename to swh/web/save_code_now/migrations/__init__.py diff --git a/swh/web/common/models.py b/swh/web/save_code_now/models.py similarity index 95% rename from swh/web/common/models.py rename to swh/web/save_code_now/models.py index fc2738aa..52c0ea09 100644 --- a/swh/web/common/models.py +++ b/swh/web/save_code_now/models.py @@ -1,135 +1,135 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import models from swh.web.common.typing import SaveOriginRequestInfo class SaveAuthorizedOrigin(models.Model): """ Model table holding origin urls authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: - app_label = "swh_web_common" + app_label = "swh_web_save_code_now" db_table = "save_authorized_origin" indexes = [models.Index(fields=["url"])] def __str__(self): return self.url class SaveUnauthorizedOrigin(models.Model): """ Model table holding origin urls not authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: - app_label = "swh_web_common" + app_label = "swh_web_save_code_now" db_table = "save_unauthorized_origin" indexes = [models.Index(fields=["url"])] def __str__(self): return self.url SAVE_REQUEST_ACCEPTED = "accepted" SAVE_REQUEST_REJECTED = "rejected" SAVE_REQUEST_PENDING = "pending" SAVE_REQUEST_STATUS = [ (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED), (SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED), (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING), ] SAVE_TASK_NOT_CREATED = "not created" SAVE_TASK_NOT_YET_SCHEDULED = "not yet scheduled" SAVE_TASK_SCHEDULED = "scheduled" SAVE_TASK_SUCCEEDED = "succeeded" SAVE_TASK_FAILED = "failed" SAVE_TASK_RUNNING = "running" SAVE_TASK_STATUS = [ (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED), (SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED), (SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED), (SAVE_TASK_SUCCEEDED, SAVE_TASK_SUCCEEDED), (SAVE_TASK_FAILED, SAVE_TASK_FAILED), (SAVE_TASK_RUNNING, SAVE_TASK_RUNNING), ] VISIT_STATUS_CREATED = "created" VISIT_STATUS_ONGOING = "ongoing" VISIT_STATUS_FULL = "full" VISIT_STATUS_PARTIAL = "partial" VISIT_STATUS_NOT_FOUND = "not_found" VISIT_STATUS_FAILED = "failed" VISIT_STATUSES = [ (VISIT_STATUS_CREATED, VISIT_STATUS_CREATED), (VISIT_STATUS_ONGOING, VISIT_STATUS_ONGOING), (VISIT_STATUS_FULL, VISIT_STATUS_FULL), (VISIT_STATUS_PARTIAL, VISIT_STATUS_PARTIAL), (VISIT_STATUS_NOT_FOUND, VISIT_STATUS_NOT_FOUND), (VISIT_STATUS_FAILED, VISIT_STATUS_FAILED), ] class SaveOriginRequest(models.Model): """ Model table holding all the save origin requests issued by users. """ id = models.BigAutoField(primary_key=True) request_date = models.DateTimeField(auto_now_add=True) visit_type = models.CharField(max_length=200, null=False) visit_status = models.TextField(choices=VISIT_STATUSES, null=True) origin_url = models.CharField(max_length=200, null=False) status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING) loading_task_id = models.IntegerField(default=-1) visit_date = models.DateTimeField(null=True) loading_task_status = models.TextField( choices=SAVE_TASK_STATUS, default=SAVE_TASK_NOT_CREATED ) # store ids of users that submitted the request as string list user_ids = models.TextField(null=True) note = models.TextField(null=True) class Meta: - app_label = "swh_web_common" + app_label = "swh_web_save_code_now" db_table = "save_origin_request" ordering = ["-id"] indexes = [models.Index(fields=["origin_url", "status"])] def to_dict(self) -> SaveOriginRequestInfo: """Map the request save model object to a json serializable dict. Returns: The corresponding SaveOriginRequetsInfo json serializable dict. """ visit_date = self.visit_date return SaveOriginRequestInfo( id=self.id, origin_url=self.origin_url, visit_type=self.visit_type, save_request_date=self.request_date.isoformat(), save_request_status=self.status, save_task_status=self.loading_task_status, visit_status=self.visit_status, visit_date=visit_date.isoformat() if visit_date else None, loading_task_id=self.loading_task_id, note=self.note, ) def __str__(self) -> str: return str(self.to_dict()) diff --git a/swh/web/common/origin_save.py b/swh/web/save_code_now/origin_save.py similarity index 99% rename from swh/web/common/origin_save.py rename to swh/web/save_code_now/origin_save.py index da88b144..9fffd110 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/save_code_now/origin_save.py @@ -1,942 +1,942 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta, timezone from functools import lru_cache from itertools import product import json import logging from typing import Any, Dict, List, Optional, Tuple from urllib.parse import urlparse from prometheus_client import Gauge import requests from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator from django.db.models import Q, QuerySet from django.utils.html import escape from swh.scheduler.utils import create_oneshot_task_dict from swh.web.common import archive from swh.web.common.exc import ( BadInputExc, ForbiddenExc, NotFoundExc, sentry_capture_exception, ) -from swh.web.common.models import ( +from swh.web.common.typing import OriginExistenceCheckInfo, SaveOriginRequestInfo +from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc +from swh.web.config import get_config, scheduler +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) -from swh.web.common.typing import OriginExistenceCheckInfo, SaveOriginRequestInfo -from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc -from swh.web.config import get_config, scheduler logger = logging.getLogger(__name__) # Number of days in the past to lookup for information MAX_THRESHOLD_DAYS = 30 # Non terminal visit statuses which needs updates NON_TERMINAL_STATUSES = [ VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING, ] def get_origin_save_authorized_urls() -> List[str]: """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls() -> List[str]: """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str: """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified if the user # that submitted it does not have special permission if bypass_pending_review: # mark the origin URL as trusted in that case SaveAuthorizedOrigin.objects.get_or_create(url=origin_url) return SAVE_REQUEST_ACCEPTED else: return SAVE_REQUEST_PENDING # map visit type to scheduler task # TODO: do not hardcode the task name here (T1157) _visit_type_task = { "git": "load-git", "hg": "load-hg", "svn": "load-svn", "cvs": "load-cvs", "bzr": "load-bzr", } _visit_type_task_privileged = { "archives": "load-archive-files", } # map scheduler task status to origin save status _save_task_status = { "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, "next_run_scheduled": SAVE_TASK_SCHEDULED, "completed": SAVE_TASK_SUCCEEDED, "disabled": SAVE_TASK_FAILED, } # map scheduler task_run status to origin save status _save_task_run_status = { "scheduled": SAVE_TASK_SCHEDULED, "started": SAVE_TASK_RUNNING, "eventful": SAVE_TASK_SUCCEEDED, "uneventful": SAVE_TASK_SUCCEEDED, "failed": SAVE_TASK_FAILED, "permfailed": SAVE_TASK_FAILED, "lost": SAVE_TASK_FAILED, } @lru_cache() def get_scheduler_load_task_types() -> List[str]: task_types = scheduler().get_task_types() return [t["type"] for t in task_types if t["type"].startswith("load")] def get_savable_visit_types_dict(privileged_user: bool = False) -> Dict: """Returned the supported task types the user has access to. Args: privileged_user: Flag to determine if all visit types should be returned or not. Default to False to only list unprivileged visit types. Returns: the dict of supported visit types for the user """ if privileged_user: task_types = {**_visit_type_task, **_visit_type_task_privileged} else: task_types = _visit_type_task # filter visit types according to scheduler load task types if available try: load_task_types = get_scheduler_load_task_types() return {k: v for k, v in task_types.items() if v in load_task_types} except Exception: return task_types def get_savable_visit_types(privileged_user: bool = False) -> List[str]: """Return the list of visit types the user can perform save requests on. Args: privileged_user: Flag to determine if all visit types should be returned or not. Default to False to only list unprivileged visit types. Returns: the list of saveable visit types """ return sorted(list(get_savable_visit_types_dict(privileged_user).keys())) def _check_visit_type_savable(visit_type: str, privileged_user: bool = False) -> None: visit_type_tasks = get_savable_visit_types(privileged_user) if visit_type not in visit_type_tasks: allowed_visit_types = ", ".join(visit_type_tasks) raise BadInputExc( f"Visit of type {visit_type} can not be saved! " f"Allowed types are the following: {allowed_visit_types}" ) _validate_url = URLValidator( schemes=["http", "https", "svn", "git", "rsync", "pserver", "ssh", "bzr"] ) def _check_origin_url_valid(origin_url: str) -> None: try: _validate_url(origin_url) except ValidationError: raise BadInputExc( f"The provided origin url ({escape(origin_url)}) is not valid!" ) parsed_url = urlparse(origin_url) if parsed_url.password not in (None, "", "anonymous"): raise BadInputExc( "The provided origin url contains a password and cannot be " "accepted for security reasons." ) def origin_exists(origin_url: str) -> OriginExistenceCheckInfo: """Check the origin url for existence. If it exists, extract some more useful information on the origin. """ resp = requests.head(origin_url, allow_redirects=True) exists = resp.ok content_length: Optional[int] = None last_modified: Optional[str] = None if exists: # Also process X-Archive-Orig-* headers in case the URL targets the # Internet Archive. size_ = resp.headers.get( "Content-Length", resp.headers.get("X-Archive-Orig-Content-Length") ) content_length = int(size_) if size_ else None try: date_str = resp.headers.get( "Last-Modified", resp.headers.get("X-Archive-Orig-Last-Modified", "") ) date = datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %Z") last_modified = date.isoformat() except ValueError: # if not provided or not parsable as per the expected format, keep it None pass return OriginExistenceCheckInfo( origin_url=origin_url, exists=exists, last_modified=last_modified, content_length=content_length, ) def _check_origin_exists(url: str) -> OriginExistenceCheckInfo: """Ensure an URL exists, if not raise an explicit message.""" metadata = origin_exists(url) if not metadata["exists"]: raise BadInputExc(f"The provided url ({escape(url)}) does not exist!") return metadata def _get_visit_info_for_save_request( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str]]: """Retrieve visit information out of a save request Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (visit date, optional visit status) for such save request origin """ visit_date = None visit_status = None time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # stop trying to find a visit date one month after save request submission # as those requests to storage are expensive and associated loading task # surely ended up with errors if time_delta.days <= MAX_THRESHOLD_DAYS: origin = save_request.origin_url ovs = archive.origin_visit_find_by_date(origin, save_request.request_date) if ovs: visit_date = parse_iso8601_date_to_utc(ovs["date"]) visit_status = ovs["status"] return visit_date, visit_status def _check_visit_update_status( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str], Optional[str]]: """Given a save request, determine whether a save request was successful or failed. Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (optional visit date, optional visit status, optional save task status) for such save request origin """ visit_date, visit_status = _get_visit_info_for_save_request(save_request) loading_task_status = None if visit_date and visit_status in ("full", "partial"): # visit has been performed, mark the saving task as succeeded loading_task_status = SAVE_TASK_SUCCEEDED elif visit_status in ("created", "ongoing"): # visit is currently running loading_task_status = SAVE_TASK_RUNNING elif visit_status in ("not_found", "failed"): loading_task_status = SAVE_TASK_FAILED else: time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # consider the task as failed if it is still in scheduled state # 30 days after its submission if time_delta.days > MAX_THRESHOLD_DAYS: loading_task_status = SAVE_TASK_FAILED return visit_date, visit_status, loading_task_status def _compute_task_loading_status( task: Optional[Dict[str, Any]] = None, task_run: Optional[Dict[str, Any]] = None, ) -> Optional[str]: loading_task_status: Optional[str] = None # First determine the loading task status out of task information if task: loading_task_status = _save_task_status[task["status"]] if task_run: loading_task_status = _save_task_run_status[task_run["status"]] return loading_task_status def _update_save_request_info( save_request: SaveOriginRequest, task: Optional[Dict[str, Any]] = None, task_run: Optional[Dict[str, Any]] = None, ) -> SaveOriginRequestInfo: """Update save request information out of the visit status and fallback to the task and task_run information if the visit status is missing. Args: save_request: Save request task: Associated scheduler task information about the save request task_run: Most recent run occurrence of the associated task Returns: Summary of the save request information updated. """ must_save = False # To determine the save code now request's final status, the visit date must be set # and the visit status must be a final one. Once they do, the save code now is # definitely done. if ( not save_request.visit_date or not save_request.visit_status or save_request.visit_status in NON_TERMINAL_STATUSES ): visit_date, visit_status, loading_task_status = _check_visit_update_status( save_request ) if not loading_task_status: # fallback when not provided loading_task_status = _compute_task_loading_status(task, task_run) if visit_date != save_request.visit_date: must_save = True save_request.visit_date = visit_date if visit_status != save_request.visit_status: must_save = True save_request.visit_status = visit_status if ( loading_task_status is not None and loading_task_status != save_request.loading_task_status ): must_save = True save_request.loading_task_status = loading_task_status if must_save: save_request.save() return save_request.to_dict() def create_save_origin_request( visit_type: str, origin_url: str, privileged_user: bool = False, user_id: Optional[int] = None, **kwargs, ) -> SaveOriginRequestInfo: """Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task through the use of the swh-scheduler component. First, some checks are performed to see if the visit type and origin url are valid but also if the the save request can be accepted. For the 'archives' visit type, this also ensures the artifacts actually exists. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: visit_type: the type of visit to perform (e.g. git, hg, svn, archives, ...) origin_url: the url of the origin to save privileged: Whether the user has some more privilege than other (bypass review, access to privileged other visit types) user_id: User identifier (provided when authenticated) kwargs: Optional parameters (e.g. artifact_url, artifact_filename, artifact_version) Raises: BadInputExc: the visit type or origin url is invalid or inexistent ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **visit_type**: the type of visit to perform * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ visit_type_tasks = get_savable_visit_types_dict(privileged_user) _check_visit_type_savable(visit_type, privileged_user) _check_origin_url_valid(origin_url) # if all checks passed so far, we can try and save the origin save_request_status = can_save_origin(origin_url, privileged_user) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority task_kwargs: Dict[str, Any] = { "priority": "high", "url": origin_url, } if visit_type == "archives": # extra arguments for that type are required archives_data = kwargs.get("archives_data", []) if not archives_data: raise BadInputExc( "Artifacts data are missing for the archives visit type." ) artifacts = [] for artifact in archives_data: artifact_url = artifact.get("artifact_url") artifact_version = artifact.get("artifact_version") if not artifact_url or not artifact_version: raise BadInputExc("Missing url or version for an artifact to load.") metadata = _check_origin_exists(artifact_url) artifacts.append( { "url": artifact_url, "version": artifact_version, "time": metadata["last_modified"], "length": metadata["content_length"], } ) task_kwargs = dict(**task_kwargs, artifacts=artifacts, snapshot_append=True) sor = None # get list of previously submitted save requests (most recent first) current_sors = list( SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ).order_by("-request_date") ) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler().get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_runs = scheduler().get_task_runs([sor.loading_task_id]) task_run = task_runs[0] if task_runs else None save_request_info = _update_save_request_info(sor, task, task_run) task_status = save_request_info["save_task_status"] # create a new scheduler task only if the previous one has been # already executed if ( task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEEDED ): can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict( visit_type_tasks[visit_type], **task_kwargs ) task = scheduler().create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task["id"] sor.save() else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, loading_task_id=task["id"], user_ids=f'"{user_id}"' if user_id else None, ) # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) user_ids = sor.user_ids if sor.user_ids is not None else "" if user_id is not None and f'"{user_id}"' not in user_ids: # update user ids list sor.user_ids = f'{sor.user_ids},"{user_id}"' sor.save() # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, user_ids=f'"{user_id}"' if user_id else None, ) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, user_ids=f'"{user_id}"' if user_id else None, ) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc( ( 'The "save code now" request has been rejected ' "because the provided origin url is blacklisted." ) ) assert sor is not None return _update_save_request_info(sor, task) def update_save_origin_requests_from_queryset( requests_queryset: QuerySet, ) -> List[SaveOriginRequestInfo]: """Update all save requests from a SaveOriginRequest queryset, update their status in db and return the list of impacted save_requests. Args: requests_queryset: input SaveOriginRequest queryset Returns: list: A list of save origin request info dicts as described in - :func:`swh.web.common.origin_save.create_save_origin_request` + :func:`swh.web.save_code_now.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) save_requests = [] if task_ids: try: tasks = scheduler().get_tasks(task_ids) tasks = {task["id"]: task for task in tasks} task_runs = scheduler().get_task_runs(tasks) task_runs = {task_run["task"]: task_run for task_run in task_runs} except Exception: # allow to avoid mocking api GET responses for /origin/save endpoint when # running cypress tests as scheduler is not available tasks = {} task_runs = {} for sor in requests_queryset: sr_dict = _update_save_request_info( sor, tasks.get(sor.loading_task_id), task_runs.get(sor.loading_task_id), ) save_requests.append(sr_dict) return save_requests def refresh_save_origin_request_statuses() -> List[SaveOriginRequestInfo]: """Refresh non-terminal save origin requests (SOR) in the backend. Non-terminal SOR are requests whose status is **accepted** and their task status are either **created**, **not yet scheduled**, **scheduled** or **running**. This shall compute this list of SOR, checks their status in the scheduler and optionally elasticsearch for their current status. Then update those in db. Finally, this returns the refreshed information on those SOR. """ pivot_date = datetime.now(tz=timezone.utc) - timedelta(days=MAX_THRESHOLD_DAYS) save_requests = SaveOriginRequest.objects.filter( # Retrieve accepted request statuses (all statuses) Q(status=SAVE_REQUEST_ACCEPTED), # those without the required information we need to update Q(visit_date__isnull=True) | Q(visit_status__isnull=True) | Q(visit_status__in=NON_TERMINAL_STATUSES), # limit results to recent ones (that is roughly 30 days old at best) Q(request_date__gte=pivot_date), ) return ( update_save_origin_requests_from_queryset(save_requests) if save_requests.count() > 0 else [] ) def get_save_origin_requests( visit_type: str, origin_url: str ) -> List[SaveOriginRequestInfo]: """ Get all save requests for a given software origin. Args: visit_type: the type of visit origin_url: the url of the origin Raises: BadInputExc: the visit type or origin url is invalid swh.web.common.exc.NotFoundExc: no save requests can be found for the given origin Returns: list: A list of save origin requests dict as described in - :func:`swh.web.common.origin_save.create_save_origin_request` + :func:`swh.web.save_code_now.origin_save.create_save_origin_request` """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) if sors.count() == 0: raise NotFoundExc( f"No save requests found for visit of type {visit_type} " f"on origin with url {origin_url}." ) return update_save_origin_requests_from_queryset(sors) def get_save_origin_task_info( save_request_id: int, full_info: bool = True ) -> Dict[str, Any]: """ Get detailed information about an accepted save origin request and its associated loading task. If the associated loading task info is archived and removed from the scheduler database, returns an empty dictionary. Args: save_request_id: identifier of a save origin request full_info: whether to return detailed info for staff users Returns: A dictionary with the following keys: - **type**: loading task type - **arguments**: loading task arguments - **id**: loading task database identifier - **backend_id**: loading task celery identifier - **scheduled**: loading task scheduling date - **ended**: loading task termination date - **status**: loading task execution status - **visit_status**: Actual visit status Depending on the availability of the task logs in the elasticsearch cluster of Software Heritage, the returned dictionary may also contain the following keys: - **name**: associated celery task name - **message**: relevant log message from task execution - **duration**: task execution time (only if it succeeded) - **worker**: name of the worker that executed the task """ try: save_request = SaveOriginRequest.objects.get(id=save_request_id) except ObjectDoesNotExist: return {} task_info: Dict[str, Any] = {} if save_request.note is not None: task_info["note"] = save_request.note try: task = scheduler().get_tasks([save_request.loading_task_id]) except Exception: # to avoid mocking GET responses of /save/task/info/ endpoint when running # cypress tests as scheduler is not available in that case task = None task = task[0] if task else None if task is None: return task_info task_run = scheduler().get_task_runs([task["id"]]) task_run = task_run[0] if task_run else None if task_run is None: return task_info task_info.update(task_run) task_info["type"] = task["type"] task_info["arguments"] = task["arguments"] task_info["id"] = task_run["task"] del task_info["task"] del task_info["metadata"] # Enrich the task info with the loading visit status task_info["visit_status"] = save_request.visit_status es_workers_index_url = get_config()["es_workers_index_url"] if not es_workers_index_url: return task_info es_workers_index_url += "/_search" if save_request.visit_date: min_ts = save_request.visit_date max_ts = min_ts + timedelta(days=7) else: min_ts = save_request.request_date max_ts = min_ts + timedelta(days=MAX_THRESHOLD_DAYS) min_ts_unix = int(min_ts.timestamp()) * 1000 max_ts_unix = int(max_ts.timestamp()) * 1000 save_task_status = _save_task_status[task["status"]] priority = "3" if save_task_status == SAVE_TASK_FAILED else "6" query = { "bool": { "must": [ {"match_phrase": {"syslog.priority": {"query": priority}}}, { "match_phrase": { "journald.custom.swh_task_id": {"query": task_run["backend_id"]} } }, { "range": { "@timestamp": { "gte": min_ts_unix, "lte": max_ts_unix, "format": "epoch_millis", } } }, ] } } try: response = requests.post( es_workers_index_url, json={"query": query, "sort": ["@timestamp"]}, timeout=30, ) results = json.loads(response.text) if results["hits"]["total"]["value"] >= 1: task_run_info = results["hits"]["hits"][-1]["_source"] journald_custom = task_run_info.get("journald", {}).get("custom", {}) task_info["duration"] = journald_custom.get( "swh_logging_args_runtime", "not available" ) task_info["message"] = task_run_info.get("message", "not available") task_info["name"] = journald_custom.get("swh_task_name", "not available") task_info["worker"] = task_run_info.get("host", {}).get("hostname") except Exception as exc: logger.warning("Request to Elasticsearch failed\n%s", exc) sentry_capture_exception(exc) if not full_info: for field in ("id", "backend_id", "worker"): # remove some staff only fields task_info.pop(field, None) if "message" in task_run and "Loading failure" in task_run["message"]: # hide traceback for non staff users, only display exception message_lines = task_info["message"].split("\n") message = "" for line in message_lines: if line.startswith("Traceback"): break message += f"{line}\n" message += message_lines[-1] task_info["message"] = message return task_info SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, documentation="Number of submitted origin save requests", labelnames=["status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, documentation="Number of accepted origin save requests", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) # Metric on the delay of save code now request per status and visit_type. This is the # time difference between the save code now is requested and the time it got ingested. ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds" _accepted_save_requests_delay_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, documentation="Save Requests Duration", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) def compute_save_requests_metrics() -> None: """Compute Prometheus metrics related to origin save requests: - Number of submitted origin save requests - Number of accepted origin save requests - Save Code Now requests delay between request time and actual time of ingestion """ request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) # for metrics, we want access to all visit types visit_types = get_savable_visit_types(privileged_user=True) labels_set = product(request_statuses, visit_types) for labels in labels_set: _submitted_save_requests_gauge.labels(*labels).set(0) labels_set = product(load_task_statuses, visit_types) for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) duration_load_task_statuses = ( SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED, ) for labels in product(duration_load_task_statuses, visit_types): _accepted_save_requests_delay_gauge.labels(*labels).set(0) for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc() _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() if ( sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED) and sor.visit_date is not None and sor.request_date is not None ): delay = sor.visit_date.timestamp() - sor.request_date.timestamp() _accepted_save_requests_delay_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc(delay) diff --git a/swh/web/templates/admin/origin-save/common.html b/swh/web/save_code_now/templates/admin/origin-save-common.html similarity index 100% rename from swh/web/templates/admin/origin-save/common.html rename to swh/web/save_code_now/templates/admin/origin-save-common.html diff --git a/swh/web/templates/admin/origin-save/filters.html b/swh/web/save_code_now/templates/admin/origin-save-filters.html similarity index 98% rename from swh/web/templates/admin/origin-save/filters.html rename to swh/web/save_code_now/templates/admin/origin-save-filters.html index eb3fc960..c50f0d76 100644 --- a/swh/web/templates/admin/origin-save/filters.html +++ b/swh/web/save_code_now/templates/admin/origin-save-filters.html @@ -1,76 +1,76 @@ -{% extends "./common.html" %} +{% extends "./origin-save-common.html" %} {% comment %} Copyright (C) 2018-2022 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% block tab_content %}
Url
Url
{% endblock %} diff --git a/swh/web/templates/admin/origin-save/requests.html b/swh/web/save_code_now/templates/admin/origin-save-requests.html similarity index 99% rename from swh/web/templates/admin/origin-save/requests.html rename to swh/web/save_code_now/templates/admin/origin-save-requests.html index 8ea2dad1..3a115311 100644 --- a/swh/web/templates/admin/origin-save/requests.html +++ b/swh/web/save_code_now/templates/admin/origin-save-requests.html @@ -1,93 +1,93 @@ -{% extends "./common.html" %} +{% extends "./origin-save-common.html" %} {% comment %} Copyright (C) 2018-2022 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% block tab_content %}
Date Type Url
Date Type Url Status Info
Date Type Url Info
{% endblock %} diff --git a/swh/web/templates/misc/origin-save-help.html b/swh/web/save_code_now/templates/origin-save-help.html similarity index 100% rename from swh/web/templates/misc/origin-save-help.html rename to swh/web/save_code_now/templates/origin-save-help.html diff --git a/swh/web/templates/misc/origin-save-list.html b/swh/web/save_code_now/templates/origin-save-list.html similarity index 100% rename from swh/web/templates/misc/origin-save-list.html rename to swh/web/save_code_now/templates/origin-save-list.html diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/save_code_now/templates/origin-save.html similarity index 99% rename from swh/web/templates/misc/origin-save.html rename to swh/web/save_code_now/templates/origin-save.html index feecc0ec..04ee73f9 100644 --- a/swh/web/templates/misc/origin-save.html +++ b/swh/web/save_code_now/templates/origin-save.html @@ -1,88 +1,88 @@ -{% extends "../layout.html" %} +{% extends "layout.html" %} {% comment %} Copyright (C) 2018-2021 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load render_bundle from webpack_loader %} {% load static %} {% block title %}{{ heading }} – Software Heritage archive{% endblock %} {% block header %} {% render_bundle 'save' %} {% endblock %} {% block navbar-content %}

Save code now

{% endblock %} {% block content %}

You can contribute to extend the content of the Software Heritage archive by submitting an origin save request. To do so, fill the required info in the form below:

{% csrf_token %}
The origin type must be specified
The origin url is not valid or does not reference a code repository
{% block tab_content %} {% endblock %}
{% endblock %} diff --git a/swh/web/save_code_now/urls.py b/swh/web/save_code_now/urls.py new file mode 100644 index 00000000..5703faf1 --- /dev/null +++ b/swh/web/save_code_now/urls.py @@ -0,0 +1,99 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.urls import re_path as url + +from swh.web.save_code_now.admin_views import ( + admin_origin_save_add_authorized_url, + admin_origin_save_add_unauthorized_url, + admin_origin_save_authorized_urls_list, + admin_origin_save_filters, + admin_origin_save_remove_authorized_url, + admin_origin_save_remove_unauthorized_url, + admin_origin_save_request_accept, + admin_origin_save_request_reject, + admin_origin_save_request_remove, + admin_origin_save_requests, + admin_origin_save_unauthorized_urls_list, +) + +# register Web API endpoints +import swh.web.save_code_now.api_views # noqa +from swh.web.save_code_now.views import ( + origin_save_help_view, + origin_save_list_view, + origin_save_requests_list, + save_origin_task_info, +) + +urlpatterns = [ + url(r"^save/$", origin_save_help_view, name="origin-save"), + url(r"^save/list/$", origin_save_list_view, name="origin-save-list"), + url( + r"^save/requests/list/(?P.+)/$", + origin_save_requests_list, + name="origin-save-requests-list", + ), + url( + r"^save/task/info/(?P.+)/$", + save_origin_task_info, + name="origin-save-task-info", + ), + url( + r"^admin/origin/save/requests/$", + admin_origin_save_requests, + name="admin-origin-save-requests", + ), + url( + r"^admin/origin/save/filters/$", + admin_origin_save_filters, + name="admin-origin-save-filters", + ), + url( + r"^admin/origin/save/authorized_urls/list/$", + admin_origin_save_authorized_urls_list, + name="admin-origin-save-authorized-urls-list", + ), + url( + r"^admin/origin/save/authorized_urls/add/(?P.+)/$", + admin_origin_save_add_authorized_url, + name="admin-origin-save-add-authorized-url", + ), + url( + r"^admin/origin/save/authorized_urls/remove/(?P.+)/$", + admin_origin_save_remove_authorized_url, + name="admin-origin-save-remove-authorized-url", + ), + url( + r"^admin/origin/save/unauthorized_urls/list/$", + admin_origin_save_unauthorized_urls_list, + name="admin-origin-save-unauthorized-urls-list", + ), + url( + r"^admin/origin/save/unauthorized_urls/add/(?P.+)/$", + admin_origin_save_add_unauthorized_url, + name="admin-origin-save-add-unauthorized-url", + ), + url( + r"^admin/origin/save/unauthorized_urls/remove/(?P.+)/$", + admin_origin_save_remove_unauthorized_url, + name="admin-origin-save-remove-unauthorized-url", + ), + url( + r"^admin/origin/save/request/accept/(?P.+)/url/(?P.+)/$", + admin_origin_save_request_accept, + name="admin-origin-save-request-accept", + ), + url( + r"^admin/origin/save/request/reject/(?P.+)/url/(?P.+)/$", + admin_origin_save_request_reject, + name="admin-origin-save-request-reject", + ), + url( + r"^admin/origin/save/request/remove/(?P.+)/$", + admin_origin_save_request_remove, + name="admin-origin-save-request-remove", + ), +] diff --git a/swh/web/misc/origin_save.py b/swh/web/save_code_now/views.py similarity index 78% rename from swh/web/misc/origin_save.py rename to swh/web/save_code_now/views.py index dffa42a6..50b25e8d 100644 --- a/swh/web/misc/origin_save.py +++ b/swh/web/save_code_now/views.py @@ -1,113 +1,97 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information + from django.core.paginator import Paginator from django.db.models import Q from django.http import JsonResponse from django.shortcuts import render -from django.urls import re_path as url from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION, privileged_user -from swh.web.common.models import SaveOriginRequest -from swh.web.common.origin_save import ( +from swh.web.save_code_now.models import SaveOriginRequest +from swh.web.save_code_now.origin_save import ( get_savable_visit_types, get_save_origin_task_info, ) -def _origin_save_help_view(request): +def origin_save_help_view(request): return render( request, - "misc/origin-save-help.html", + "origin-save-help.html", { "heading": ("Request the saving of a software origin into the archive"), "visit_types": get_savable_visit_types( privileged_user(request, permissions=[SWH_AMBASSADOR_PERMISSION]) ), }, ) -def _origin_save_list_view(request): +def origin_save_list_view(request): return render( request, - "misc/origin-save-list.html", + "origin-save-list.html", { "heading": ("Request the saving of a software origin into the archive"), "visit_types": get_savable_visit_types( privileged_user(request, permissions=[SWH_AMBASSADOR_PERMISSION]) ), }, ) -def _origin_save_requests_list(request, status): +def origin_save_requests_list(request, status): if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} table_data["recordsTotal"] = save_requests.count() table_data["draw"] = int(request.GET["draw"]) search_value = request.GET["search[value]"] column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order save_requests = save_requests.order_by(field_order) length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 if search_value: save_requests = save_requests.filter( Q(status__icontains=search_value) | Q(loading_task_status__icontains=search_value) | Q(visit_type__icontains=search_value) | Q(origin_url__icontains=search_value) ) if ( int(request.GET.get("user_requests_only", "0")) and request.user.is_authenticated ): save_requests = save_requests.filter(user_ids__contains=f'"{request.user.id}"') table_data["recordsFiltered"] = save_requests.count() paginator = Paginator(save_requests, length) table_data["data"] = [sor.to_dict() for sor in paginator.page(page).object_list] return JsonResponse(table_data) -def _save_origin_task_info(request, save_request_id): +def save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info( save_request_id, full_info=request.user.is_staff ) for date_field in ("scheduled", "started", "ended"): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() return JsonResponse(request_info) - - -urlpatterns = [ - url(r"^save/$", _origin_save_help_view, name="origin-save"), - url(r"^save/list/$", _origin_save_list_view, name="origin-save-list"), - url( - r"^save/requests/list/(?P.+)/$", - _origin_save_requests_list, - name="origin-save-requests-list", - ), - url( - r"^save/task/info/(?P.+)/$", - _save_origin_task_info, - name="origin-save-task-info", - ), -] diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index e9acf9dc..a91d4644 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,348 +1,349 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ from importlib.util import find_spec import os import sys from typing import Any, Dict from django.utils import encoding from swh.web.auth.utils import OIDC_SWH_WEB_CLIENT_ID from swh.web.config import get_config # Fix django-js-reverse 0.9.1 compatibility with django 4.x # TODO: Remove that hack once a new django-js-reverse release # is available on PyPI if not hasattr(encoding, "force_text"): setattr(encoding, "force_text", encoding.force_str) swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config["secret_key"] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config["debug"] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] # Application definition SWH_BASE_DJANGO_APPS = [ "swh.web.auth", "swh.web.browse", "swh.web.common", "swh.web.api", ] SWH_EXTRA_DJANGO_APPS = [ app for app in swh_web_config["swh_extra_django_apps"] if app not in SWH_BASE_DJANGO_APPS ] # swh.web.api must be the last loaded application due to the way # its URLS are registered SWH_DJANGO_APPS = SWH_EXTRA_DJANGO_APPS + SWH_BASE_DJANGO_APPS + INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", "rest_framework", "webpack_loader", "django_js_reverse", "corsheaders", ] + SWH_DJANGO_APPS MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "swh.auth.django.middlewares.OIDCSessionExpiredMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.web.common.middlewares.ThrottlingHeadersMiddleware", "swh.web.common.middlewares.ExceptionMiddleware", ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config["serve_assets"]: MIDDLEWARE.insert(0, "django.middleware.gzip.GZipMiddleware") ROOT_URLCONF = "swh.web.urls" SWH_APP_TEMPLATES = [os.path.join(PROJECT_DIR, "../templates")] # Add templates directory from each SWH Django application for app in SWH_DJANGO_APPS: try: app_spec = find_spec(app) assert app_spec is not None, f"Django application {app} not found !" assert app_spec.origin is not None SWH_APP_TEMPLATES.append( os.path.join(os.path.dirname(app_spec.origin), "templates") ) except ModuleNotFoundError: assert False, f"Django application {app} not found !" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": SWH_APP_TEMPLATES, "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", "swh.web.common.utils.context_processor", ], "libraries": { "swh_templatetags": "swh.web.common.swh_templatetags", }, }, }, ] DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": swh_web_config.get("development_db", ""), } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, { "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = "/static/" # static folder location when swh-web has been installed with pip STATIC_DIR = os.path.join(sys.prefix, "share/swh/web/static") if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") STATICFILES_DIRS = [STATIC_DIR] INTERNAL_IPS = ["127.0.0.1"] throttle_rates = {} http_requests = ["GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"] throttling = swh_web_config["throttling"] for limiter_scope, limiter_conf in throttling["scopes"].items(): if "default" in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"]["default"] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope + "_" + http_request.lower()] = limiter_conf[ "limiter_rate" ][http_request] REST_FRAMEWORK: Dict[str, Any] = { "DEFAULT_RENDERER_CLASSES": ( "rest_framework.renderers.JSONRenderer", "swh.web.api.renderers.YAMLRenderer", "rest_framework.renderers.TemplateHTMLRenderer", ), "DEFAULT_THROTTLE_CLASSES": ( "swh.web.api.throttling.SwhWebRateThrottle", "swh.web.api.throttling.SwhWebUserRateThrottle", ), "DEFAULT_THROTTLE_RATES": throttle_rates, "DEFAULT_AUTHENTICATION_CLASSES": [ "rest_framework.authentication.SessionAuthentication", "swh.auth.django.backends.OIDCBearerTokenAuthentication", ], "EXCEPTION_HANDLER": "swh.web.api.apiresponse.error_response_handler", } LOGGING = { "version": 1, "disable_existing_loggers": False, "filters": { "require_debug_false": { "()": "django.utils.log.RequireDebugFalse", }, "require_debug_true": { "()": "django.utils.log.RequireDebugTrue", }, }, "formatters": { "request": { "format": "[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "simple": { "format": "[%(asctime)s] [%(levelname)s] %(message)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "verbose": { "format": ( "[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s " "- %(message)s" ), "datefmt": "%d/%b/%Y %H:%M:%S", }, }, "handlers": { "console": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "simple", }, "file": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "simple", }, "file_request": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "request", }, "console_verbose": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "verbose", }, "file_verbose": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "verbose", }, "null": { "class": "logging.NullHandler", }, }, "loggers": { "": { "handlers": ["console_verbose", "file_verbose"], "level": "DEBUG" if DEBUG else "WARNING", }, "django": { "handlers": ["console"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.request": { "handlers": ["file_request"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.db.backends": {"handlers": ["null"], "propagate": False}, "django.utils.autoreload": { "level": "INFO", }, "swh.core.statsd": { "level": "INFO", }, "urllib3": { "level": "INFO", }, }, } WEBPACK_LOADER = { "DEFAULT": { "CACHE": False, "BUNDLE_DIR_NAME": "./", "STATS_FILE": os.path.join(STATIC_DIR, "webpack-stats.json"), "POLL_INTERVAL": 0.1, "TIMEOUT": None, "IGNORE": [".+\\.hot-update.js", ".+\\.map"], } } LOGIN_URL = "/admin/login/" LOGIN_REDIRECT_URL = "admin" SESSION_ENGINE = "django.contrib.sessions.backends.cache" CACHES = { "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, } JS_REVERSE_JS_MINIFY = False CORS_ORIGIN_ALLOW_ALL = True CORS_URLS_REGEX = r"^/(badge|api)/.*$" AUTHENTICATION_BACKENDS = [ "django.contrib.auth.backends.ModelBackend", "swh.auth.django.backends.OIDCAuthorizationCodePKCEBackend", ] SWH_AUTH_SERVER_URL = swh_web_config["keycloak"]["server_url"] SWH_AUTH_REALM_NAME = swh_web_config["keycloak"]["realm_name"] SWH_AUTH_CLIENT_ID = OIDC_SWH_WEB_CLIENT_ID SWH_AUTH_SESSION_EXPIRED_REDIRECT_VIEW = "logout" DEFAULT_AUTO_FIELD = "django.db.models.AutoField" diff --git a/swh/web/templates/includes/top-navigation.html b/swh/web/templates/includes/top-navigation.html index 0067abc5..987216e9 100644 --- a/swh/web/templates/includes/top-navigation.html +++ b/swh/web/templates/includes/top-navigation.html @@ -1,153 +1,155 @@ {% comment %} Copyright (C) 2017-2020 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %}
{% if snapshot_context %} {% if snapshot_context.branch or snapshot_context.release or snapshot_context.revision_id %} {% endif %} {% endif %}
{% include "includes/breadcrumbs.html" %}
{% if top_right_link %} {% if top_right_link.icon %} {% endif %} {{ top_right_link.text }} {% endif %} {% if available_languages %} {% endif %} {% if show_actions %} {% if not snapshot_context or not snapshot_context.is_empty %} {% include "includes/vault-create-tasks.html" %} {% endif %} - {% include "includes/take-new-snapshot.html" %} + {% if "swh.web.save_code_now" in SWH_DJANGO_APPS %} + {% include "includes/take-new-snapshot.html" %} + {% endif %} {% include "includes/show-metadata.html" %} {% endif %}
{% include "includes/show-swhids.html" %} diff --git a/swh/web/templates/layout.html b/swh/web/templates/layout.html index 716708f9..86f89492 100644 --- a/swh/web/templates/layout.html +++ b/swh/web/templates/layout.html @@ -1,313 +1,315 @@ {% comment %} Copyright (C) 2015-2022 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load js_reverse %} {% load static %} {% load render_bundle from webpack_loader %} {% load swh_templatetags %} {% block title %}{% endblock %} {% render_bundle 'vendors' %} {% render_bundle 'webapp' %} {% render_bundle 'guided_tour' %} {{ request.user.is_authenticated|json_script:"swh_user_logged_in" }} {% include "includes/favicon.html" %} {% block header %}{% endblock %} {% if swh_web_prod %} {% endif %}
{% include "misc/hiring-banner.html" %}
{% if swh_web_staging %}
Staging
v{{ swh_web_version }}
{% elif swh_web_dev %}
Development
v{{ swh_web_version|split:"+"|first }}
{% endif %} {% block content %}{% endblock %}
{% include "includes/global-modals.html" %}
back to top
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py index ca320b27..3097f4a2 100644 --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -1,1240 +1,1240 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from datetime import timedelta import functools from importlib import import_module, reload import json import os import random import shutil import sys import time from typing import Any, Dict, List, Optional from _pytest.python import Function from hypothesis import HealthCheck from hypothesis import settings as hypothesis_settings import pytest from pytest_django.fixtures import SettingsWrapper from django.contrib.auth.models import User from django.core.cache import cache from django.test.utils import setup_databases from django.urls import clear_url_caches from rest_framework.test import APIClient, APIRequestFactory from swh.model.hashutil import ( ALGORITHMS, DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex, ) from swh.model.model import Content, Directory from swh.model.swhids import CoreSWHID, ObjectType from swh.scheduler.tests.common import TASK_TYPES from swh.storage.algos.origin import origin_get_latest_visit_status from swh.storage.algos.revisions_walker import get_revisions_walker from swh.storage.algos.snapshot import snapshot_get_all_branches, snapshot_get_latest from swh.web.auth.utils import ( ADD_FORGE_MODERATOR_PERMISSION, MAILMAP_ADMIN_PERMISSION, MAILMAP_PERMISSION, OIDC_SWH_WEB_CLIENT_ID, ) from swh.web.common import converters -from swh.web.common.origin_save import get_scheduler_load_task_types from swh.web.common.typing import OriginVisitInfo from swh.web.common.utils import browsers_supported_image_mimes from swh.web.config import get_config +from swh.web.save_code_now.origin_save import get_scheduler_load_task_types from swh.web.tests.data import ( get_tests_data, override_storages, random_content, random_sha1, random_sha1_bytes, random_sha256, ) from swh.web.tests.utils import create_django_permission os.environ["LC_ALL"] = "C.UTF-8" fossology_missing = shutil.which("nomossa") is None # Register some hypothesis profiles hypothesis_settings.register_profile("default", hypothesis_settings()) # we use getattr here to keep mypy happy regardless hypothesis version function_scoped_fixture_check = ( [getattr(HealthCheck, "function_scoped_fixture")] if hasattr(HealthCheck, "function_scoped_fixture") else [] ) suppress_health_check = [ HealthCheck.too_slow, HealthCheck.filter_too_much, ] + function_scoped_fixture_check hypothesis_settings.register_profile( "swh-web", hypothesis_settings( deadline=None, suppress_health_check=suppress_health_check, ), ) hypothesis_settings.register_profile( "swh-web-fast", hypothesis_settings( deadline=None, max_examples=5, suppress_health_check=suppress_health_check, ), ) def pytest_addoption(parser): parser.addoption("--swh-web-random-seed", action="store", default=None) def pytest_configure(config): # Use fast hypothesis profile by default if none has been # explicitly specified in pytest option if config.getoption("--hypothesis-profile") is None: hypothesis_settings.load_profile("swh-web-fast") # Small hack in order to be able to run the unit tests # without static assets generated by webpack. # Those assets are not really needed for the Python tests # but the django templates will fail to load due to missing # generated file webpack-stats.json describing the js and css # files to include. # So generate a dummy webpack-stats.json file to overcome # that issue. test_dir = os.path.dirname(__file__) # location of the static folder when running tests through tox data_dir = os.path.join(sys.prefix, "share/swh/web") static_dir = os.path.join(data_dir, "static") if not os.path.exists(static_dir): # location of the static folder when running tests locally with pytest static_dir = os.path.join(test_dir, "../../../static") webpack_stats = os.path.join(static_dir, "webpack-stats.json") if os.path.exists(webpack_stats): return bundles_dir = os.path.join(test_dir, "../../../assets/src/bundles") if not os.path.exists(bundles_dir): # location of the bundles folder when running tests with tox bundles_dir = os.path.join(data_dir, "assets/src/bundles") _, bundles, _ = next(os.walk(bundles_dir)) mock_webpack_stats = { "status": "done", "publicPath": "/static", "chunks": {}, "assets": {}, } for bundle in bundles: asset = f"js/{bundle}.js" mock_webpack_stats["chunks"][bundle] = [asset] mock_webpack_stats["assets"][asset] = { "name": asset, "publicPath": f"/static/{asset}", } with open(webpack_stats, "w") as outfile: json.dump(mock_webpack_stats, outfile) _swh_web_custom_section = "swh-web custom section" _random_seed_cache_key = "swh-web/random-seed" @pytest.fixture(scope="function", autouse=True) def random_seed(pytestconfig): state = random.getstate() seed = pytestconfig.getoption("--swh-web-random-seed") if seed is None: seed = time.time() seed = int(seed) cache.set(_random_seed_cache_key, seed) random.seed(seed) yield seed random.setstate(state) def pytest_report_teststatus(report, *args): if report.when == "call" and report.outcome == "failed": seed = cache.get(_random_seed_cache_key, None) line = ( f'FAILED {report.nodeid}: Use "pytest --swh-web-random-seed={seed} ' f'{report.nodeid}" to reproduce that test failure with same inputs' ) report.sections.append((_swh_web_custom_section, line)) def pytest_terminal_summary(terminalreporter, *args): reports = terminalreporter.getreports("failed") content = os.linesep.join( text for report in reports for secname, text in report.sections if secname == _swh_web_custom_section ) if content: terminalreporter.ensure_newline() terminalreporter.section(_swh_web_custom_section, sep="-", blue=True, bold=True) terminalreporter.line(content) # Clear Django cache before each test @pytest.fixture(autouse=True) def django_cache_cleared(): cache.clear() # Alias rf fixture from pytest-django @pytest.fixture def request_factory(rf): return rf # Fixture to get test client from Django REST Framework @pytest.fixture def api_client(): return APIClient() # Fixture to get API request factory from Django REST Framework @pytest.fixture def api_request_factory(): return APIRequestFactory() # Initialize tests data @pytest.fixture(scope="function", autouse=True) def tests_data(): data = get_tests_data(reset=True) # Update swh-web configuration to use the in-memory storages # instantiated in the tests.data module override_storages( data["storage"], data["idx_storage"], data["search"], data["counters"] ) return data @pytest.fixture(scope="function") def sha1(): """Fixture returning a valid hexadecimal sha1 value.""" return random_sha1() @pytest.fixture(scope="function") def invalid_sha1(): """Fixture returning an invalid sha1 representation.""" return hash_to_hex(bytes(random.randint(0, 255) for _ in range(50))) @pytest.fixture(scope="function") def sha256(): """Fixture returning a valid hexadecimal sha256 value.""" return random_sha256() def _known_swh_objects(tests_data, object_type): return tests_data[object_type] @pytest.fixture(scope="function") def content(tests_data): """Fixture returning a random content ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "contents")) @pytest.fixture(scope="function") def contents(tests_data): """Fixture returning random contents ingested into the test archive.""" return random.choices( _known_swh_objects(tests_data, "contents"), k=random.randint(2, 8) ) def _new_content(tests_data): while True: new_content = random_content() sha1_bytes = hash_to_bytes(new_content["sha1"]) if tests_data["storage"].content_get_data(sha1_bytes) is None: return new_content @pytest.fixture(scope="function") def unknown_content(tests_data): """Fixture returning a random content not ingested into the test archive.""" return _new_content(tests_data) @pytest.fixture(scope="function") def unknown_contents(tests_data): """Fixture returning random contents not ingested into the test archive.""" new_contents = [] new_content_ids = set() nb_contents = random.randint(2, 8) while len(new_contents) != nb_contents: new_content = _new_content(tests_data) if new_content["sha1"] not in new_content_ids: new_contents.append(new_content) new_content_ids.add(new_content["sha1"]) return list(new_contents) @pytest.fixture(scope="function") def empty_content(): """Fixture returning the empty content ingested into the test archive.""" empty_content = Content.from_data(data=b"").to_dict() for algo in DEFAULT_ALGORITHMS: empty_content[algo] = hash_to_hex(empty_content[algo]) return empty_content @functools.lru_cache(maxsize=None) def _content_text(): return list( filter( lambda c: c["mimetype"].startswith("text/"), _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_text(): """ Fixture returning a random textual content ingested into the test archive. """ return random.choice(_content_text()) @functools.lru_cache(maxsize=None) def _content_text_non_utf8(): return list( filter( lambda c: c["mimetype"].startswith("text/") and c["encoding"] not in ("utf-8", "us-ascii"), _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_text_non_utf8(): """Fixture returning a random textual content not encoded to UTF-8 ingested into the test archive. """ return random.choice(_content_text_non_utf8()) @functools.lru_cache(maxsize=None) def _content_application_no_highlight(): return list( filter( lambda c: c["mimetype"].startswith("application/") and c["hljs_language"] == "plaintext", _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_application_no_highlight(): """Fixture returning a random textual content with mimetype starting with application/ and no detected programming language to highlight ingested into the test archive. """ return random.choice(_content_application_no_highlight()) @functools.lru_cache(maxsize=None) def _content_text_no_highlight(): return list( filter( lambda c: c["mimetype"].startswith("text/") and c["hljs_language"] == "plaintext", _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_text_no_highlight(): """Fixture returning a random textual content with no detected programming language to highlight ingested into the test archive. """ return random.choice(_content_text_no_highlight()) @functools.lru_cache(maxsize=None) def _content_image_type(): return list( filter( lambda c: c["mimetype"] in browsers_supported_image_mimes, _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_image_type(): """Fixture returning a random image content ingested into the test archive.""" return random.choice(_content_image_type()) @functools.lru_cache(maxsize=None) def _content_unsupported_image_type_rendering(): return list( filter( lambda c: c["mimetype"].startswith("image/") and c["mimetype"] not in browsers_supported_image_mimes, _known_swh_objects(get_tests_data(), "contents"), ) ) @pytest.fixture(scope="function") def content_unsupported_image_type_rendering(): """Fixture returning a random image content ingested into the test archive that can not be rendered by browsers. """ return random.choice(_content_unsupported_image_type_rendering()) @functools.lru_cache(maxsize=None) def _content_utf8_detected_as_binary(): def utf8_binary_detected(content): if content["encoding"] != "binary": return False try: content["raw_data"].decode("utf-8") except Exception: return False else: return True return list( filter(utf8_binary_detected, _known_swh_objects(get_tests_data(), "contents")) ) @pytest.fixture(scope="function") def content_utf8_detected_as_binary(): """Fixture returning a random textual content detected as binary by libmagic while they are valid UTF-8 encoded files. """ return random.choice(_content_utf8_detected_as_binary()) @pytest.fixture(scope="function") def directory(tests_data): """Fixture returning a random directory ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "directories")) @functools.lru_cache(maxsize=None) def _directory_with_entry_type(type_): tests_data = get_tests_data() return list( filter( lambda d: any( [ e["type"] == type_ for e in list(tests_data["storage"].directory_ls(hash_to_bytes(d))) ] ), _known_swh_objects(tests_data, "directories"), ) ) @pytest.fixture(scope="function") def directory_with_subdirs(): """Fixture returning a random directory containing sub directories ingested into the test archive. """ return random.choice(_directory_with_entry_type("dir")) @pytest.fixture(scope="function") def directory_with_files(): """Fixture returning a random directory containing at least one regular file.""" return random.choice(_directory_with_entry_type("file")) @pytest.fixture(scope="function") def unknown_directory(tests_data): """Fixture returning a random directory not ingested into the test archive.""" while True: new_directory = random_sha1() sha1_bytes = hash_to_bytes(new_directory) if list(tests_data["storage"].directory_missing([sha1_bytes])): return new_directory @pytest.fixture(scope="function") def empty_directory(): """Fixture returning the empty directory ingested into the test archive.""" return Directory(entries=()).id.hex() @pytest.fixture(scope="function") def revision(tests_data): """Fixturereturning a random revision ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "revisions")) @pytest.fixture(scope="function") def revisions(tests_data): """Fixture returning random revisions ingested into the test archive.""" return random.choices( _known_swh_objects(tests_data, "revisions"), k=random.randint(2, 8), ) @pytest.fixture(scope="function") def revisions_list(tests_data): """Fixture returning random revisions ingested into the test archive.""" def gen_revisions_list(size): return random.choices( _known_swh_objects(tests_data, "revisions"), k=size, ) return gen_revisions_list @pytest.fixture(scope="function") def unknown_revision(tests_data): """Fixture returning a random revision not ingested into the test archive.""" while True: new_revision = random_sha1() sha1_bytes = hash_to_bytes(new_revision) if tests_data["storage"].revision_get([sha1_bytes])[0] is None: return new_revision def _get_origin_dfs_revisions_walker(tests_data): storage = tests_data["storage"] origin = random.choice(tests_data["origins"][:-1]) snapshot = snapshot_get_latest(storage, origin["url"]) if snapshot.branches[b"HEAD"].target_type.value == "alias": target = snapshot.branches[b"HEAD"].target head = snapshot.branches[target].target else: head = snapshot.branches[b"HEAD"].target return get_revisions_walker("dfs", storage, head) @functools.lru_cache(maxsize=None) def _ancestor_revisions_data(): # get a dfs revisions walker for one of the origins # loaded into the test archive revisions_walker = _get_origin_dfs_revisions_walker(get_tests_data()) master_revisions = [] children = defaultdict(list) init_rev_found = False # get revisions only authored in the master branch for rev in revisions_walker: for rev_p in rev["parents"]: children[rev_p].append(rev["id"]) if not init_rev_found: master_revisions.append(rev) if not rev["parents"]: init_rev_found = True return master_revisions, children @pytest.fixture(scope="function") def ancestor_revisions(): """Fixture returning a pair of revisions ingested into the test archive with an ancestor relation. """ master_revisions, children = _ancestor_revisions_data() # head revision root_rev = master_revisions[0] # pick a random revision, different from head, only authored # in the master branch ancestor_rev_idx = random.choice(list(range(1, len(master_revisions) - 1))) ancestor_rev = master_revisions[ancestor_rev_idx] ancestor_child_revs = children[ancestor_rev["id"]] return { "sha1_git_root": hash_to_hex(root_rev["id"]), "sha1_git": hash_to_hex(ancestor_rev["id"]), "children": [hash_to_hex(r) for r in ancestor_child_revs], } @functools.lru_cache(maxsize=None) def _non_ancestor_revisions_data(): # get a dfs revisions walker for one of the origins # loaded into the test archive revisions_walker = _get_origin_dfs_revisions_walker(get_tests_data()) merge_revs = [] children = defaultdict(list) # get all merge revisions for rev in revisions_walker: if len(rev["parents"]) > 1: merge_revs.append(rev) for rev_p in rev["parents"]: children[rev_p].append(rev["id"]) return merge_revs, children @pytest.fixture(scope="function") def non_ancestor_revisions(): """Fixture returning a pair of revisions ingested into the test archive with no ancestor relation. """ merge_revs, children = _non_ancestor_revisions_data() # find a merge revisions whose parents have a unique child revision random.shuffle(merge_revs) selected_revs = None for merge_rev in merge_revs: if all(len(children[rev_p]) == 1 for rev_p in merge_rev["parents"]): selected_revs = merge_rev["parents"] return { "sha1_git_root": hash_to_hex(selected_revs[0]), "sha1_git": hash_to_hex(selected_revs[1]), } @pytest.fixture(scope="function") def revision_with_submodules(): """Fixture returning a revision that is known to point to a directory with revision entries (aka git submodules) """ return { "rev_sha1_git": "ffcb69001f3f6745dfd5b48f72ab6addb560e234", "rev_dir_sha1_git": "d92a21446387fa28410e5a74379c934298f39ae2", "rev_dir_rev_path": "libtess2", } @pytest.fixture(scope="function") def release(tests_data): """Fixture returning a random release ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "releases")) @pytest.fixture(scope="function") def releases(tests_data): """Fixture returning random releases ingested into the test archive.""" return random.choices( _known_swh_objects(tests_data, "releases"), k=random.randint(2, 8) ) @pytest.fixture(scope="function") def unknown_release(tests_data): """Fixture returning a random release not ingested into the test archive.""" while True: new_release = random_sha1() sha1_bytes = hash_to_bytes(new_release) if tests_data["storage"].release_get([sha1_bytes])[0] is None: return new_release @pytest.fixture(scope="function") def snapshot(tests_data): """Fixture returning a random snapshot ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "snapshots")) @pytest.fixture(scope="function") def unknown_snapshot(tests_data): """Fixture returning a random snapshot not ingested into the test archive.""" while True: new_snapshot = random_sha1() sha1_bytes = hash_to_bytes(new_snapshot) if tests_data["storage"].snapshot_get_branches(sha1_bytes) is None: return new_snapshot @pytest.fixture(scope="function") def origin(tests_data): """Fixture returning a random origin ingested into the test archive.""" return random.choice(_known_swh_objects(tests_data, "origins")) @functools.lru_cache(maxsize=None) def _origin_with_multiple_visits(): tests_data = get_tests_data() origins = [] storage = tests_data["storage"] for origin in tests_data["origins"]: visit_page = storage.origin_visit_get(origin["url"]) if len(visit_page.results) > 1: origins.append(origin) return origins @pytest.fixture(scope="function") def origin_with_multiple_visits(): """Fixture returning a random origin with multiple visits ingested into the test archive. """ return random.choice(_origin_with_multiple_visits()) @functools.lru_cache(maxsize=None) def _origin_with_releases(): tests_data = get_tests_data() origins = [] for origin in tests_data["origins"]: snapshot = snapshot_get_latest(tests_data["storage"], origin["url"]) if any([b.target_type.value == "release" for b in snapshot.branches.values()]): origins.append(origin) return origins @pytest.fixture(scope="function") def origin_with_releases(): """Fixture returning a random origin with releases ingested into the test archive.""" return random.choice(_origin_with_releases()) @functools.lru_cache(maxsize=None) def _origin_with_pull_request_branches(): tests_data = get_tests_data() origins = [] storage = tests_data["storage"] for origin in storage.origin_list(limit=1000).results: snapshot = snapshot_get_latest(storage, origin.url) if any([b"refs/pull/" in b for b in snapshot.branches]): origins.append(origin) return origins @pytest.fixture(scope="function") def origin_with_pull_request_branches(): """Fixture returning a random origin with pull request branches ingested into the test archive. """ return random.choice(_origin_with_pull_request_branches()) @functools.lru_cache(maxsize=None) def _object_type_swhid(object_type): return list( filter( lambda swhid: swhid.object_type == object_type, _known_swh_objects(get_tests_data(), "swhids"), ) ) @pytest.fixture(scope="function") def content_swhid(): """Fixture returning a qualified SWHID for a random content object ingested into the test archive. """ return random.choice(_object_type_swhid(ObjectType.CONTENT)) @pytest.fixture(scope="function") def directory_swhid(): """Fixture returning a qualified SWHID for a random directory object ingested into the test archive. """ return random.choice(_object_type_swhid(ObjectType.DIRECTORY)) @pytest.fixture(scope="function") def release_swhid(): """Fixture returning a qualified SWHID for a random release object ingested into the test archive. """ return random.choice(_object_type_swhid(ObjectType.RELEASE)) @pytest.fixture(scope="function") def revision_swhid(): """Fixture returning a qualified SWHID for a random revision object ingested into the test archive. """ return random.choice(_object_type_swhid(ObjectType.REVISION)) @pytest.fixture(scope="function") def snapshot_swhid(): """Fixture returning a qualified SWHID for a snapshot object ingested into the test archive. """ return random.choice(_object_type_swhid(ObjectType.SNAPSHOT)) @pytest.fixture(scope="function", params=list(ObjectType)) def unknown_core_swhid(request) -> CoreSWHID: """Fixture returning an unknown core SWHID. Tests using this will be called once per object type. """ return CoreSWHID( object_type=request.param, object_id=random_sha1_bytes(), ) # Fixture to manipulate data from a sample archive used in the tests @pytest.fixture(scope="function") def archive_data(tests_data): return _ArchiveData(tests_data) # Fixture to manipulate indexer data from a sample archive used in the tests @pytest.fixture(scope="function") def indexer_data(tests_data): return _IndexerData(tests_data) # Custom data directory for requests_mock @pytest.fixture def datadir(): return os.path.join(os.path.abspath(os.path.dirname(__file__)), "resources") class _ArchiveData: """ Helper class to manage data from a sample test archive. It is initialized with a reference to an in-memory storage containing raw tests data. It is basically a proxy to Storage interface but it overrides some methods to retrieve those tests data in a json serializable format in order to ease tests implementation. """ def __init__(self, tests_data): self.storage = tests_data["storage"] def __getattr__(self, key): if key == "storage": raise AttributeError(key) # Forward calls to non overridden Storage methods to wrapped # storage instance return getattr(self.storage, key) def content_find(self, content: Dict[str, Any]) -> Dict[str, Any]: cnt_ids_bytes = { algo_hash: hash_to_bytes(content[algo_hash]) for algo_hash in ALGORITHMS if content.get(algo_hash) } cnt = self.storage.content_find(cnt_ids_bytes) return converters.from_content(cnt[0].to_dict()) if cnt else cnt def content_get(self, cnt_id: str) -> Dict[str, Any]: cnt_id_bytes = hash_to_bytes(cnt_id) content = self.storage.content_get([cnt_id_bytes])[0] if content: content_d = content.to_dict() content_d.pop("ctime", None) else: content_d = None return converters.from_swh( content_d, hashess={"sha1", "sha1_git", "sha256", "blake2s256"} ) def content_get_data(self, cnt_id: str) -> Optional[Dict[str, Any]]: cnt_id_bytes = hash_to_bytes(cnt_id) cnt_data = self.storage.content_get_data(cnt_id_bytes) if cnt_data is None: return None return converters.from_content({"data": cnt_data, "sha1": cnt_id_bytes}) def directory_get(self, dir_id): return {"id": dir_id, "content": self.directory_ls(dir_id)} def directory_ls(self, dir_id): cnt_id_bytes = hash_to_bytes(dir_id) dir_content = map( converters.from_directory_entry, self.storage.directory_ls(cnt_id_bytes) ) return list(dir_content) def release_get(self, rel_id: str) -> Optional[Dict[str, Any]]: rel_id_bytes = hash_to_bytes(rel_id) rel_data = self.storage.release_get([rel_id_bytes])[0] return converters.from_release(rel_data) if rel_data else None def revision_get(self, rev_id: str) -> Optional[Dict[str, Any]]: rev_id_bytes = hash_to_bytes(rev_id) rev_data = self.storage.revision_get([rev_id_bytes])[0] return converters.from_revision(rev_data) if rev_data else None def revision_log(self, rev_id, limit=None): rev_id_bytes = hash_to_bytes(rev_id) return list( map( converters.from_revision, self.storage.revision_log([rev_id_bytes], limit=limit), ) ) def snapshot_get_latest(self, origin_url): snp = snapshot_get_latest(self.storage, origin_url) return converters.from_snapshot(snp.to_dict()) def origin_get(self, origin_urls): origins = self.storage.origin_get(origin_urls) return [converters.from_origin(o.to_dict()) for o in origins] def origin_visit_get(self, origin_url): next_page_token = None visits = [] while True: visit_page = self.storage.origin_visit_get( origin_url, page_token=next_page_token ) next_page_token = visit_page.next_page_token for visit in visit_page.results: visit_status = self.storage.origin_visit_status_get_latest( origin_url, visit.visit ) visits.append( converters.from_origin_visit( {**visit_status.to_dict(), "type": visit.type} ) ) if not next_page_token: break return visits def origin_visit_get_by(self, origin_url: str, visit_id: int) -> OriginVisitInfo: visit = self.storage.origin_visit_get_by(origin_url, visit_id) assert visit is not None visit_status = self.storage.origin_visit_status_get_latest(origin_url, visit_id) assert visit_status is not None return converters.from_origin_visit( {**visit_status.to_dict(), "type": visit.type} ) def origin_visit_status_get_latest( self, origin_url, type: Optional[str] = None, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False, ): visit_status = origin_get_latest_visit_status( self.storage, origin_url, type=type, allowed_statuses=allowed_statuses, require_snapshot=require_snapshot, ) return ( converters.from_origin_visit(visit_status.to_dict()) if visit_status else None ) def snapshot_get(self, snapshot_id): snp = snapshot_get_all_branches(self.storage, hash_to_bytes(snapshot_id)) return converters.from_snapshot(snp.to_dict()) def snapshot_get_branches( self, snapshot_id, branches_from="", branches_count=1000, target_types=None ): partial_branches = self.storage.snapshot_get_branches( hash_to_bytes(snapshot_id), branches_from.encode(), branches_count, target_types, ) return converters.from_partial_branches(partial_branches) def snapshot_get_head(self, snapshot): if snapshot["branches"]["HEAD"]["target_type"] == "alias": target = snapshot["branches"]["HEAD"]["target"] head = snapshot["branches"][target]["target"] else: head = snapshot["branches"]["HEAD"]["target"] return head def snapshot_count_branches(self, snapshot_id): counts = dict.fromkeys(("alias", "release", "revision"), 0) counts.update(self.storage.snapshot_count_branches(hash_to_bytes(snapshot_id))) counts.pop(None, None) return counts class _IndexerData: """ Helper class to manage indexer tests data It is initialized with a reference to an in-memory indexer storage containing raw tests data. It also defines class methods to retrieve those tests data in a json serializable format in order to ease tests implementation. """ def __init__(self, tests_data): self.idx_storage = tests_data["idx_storage"] self.mimetype_indexer = tests_data["mimetype_indexer"] self.license_indexer = tests_data["license_indexer"] def content_add_mimetype(self, cnt_id): self.mimetype_indexer.run([hash_to_bytes(cnt_id)]) def content_get_mimetype(self, cnt_id): mimetype = self.idx_storage.content_mimetype_get([hash_to_bytes(cnt_id)])[ 0 ].to_dict() return converters.from_filetype(mimetype) def content_add_license(self, cnt_id): self.license_indexer.run([hash_to_bytes(cnt_id)]) def content_get_license(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) licenses = self.idx_storage.content_fossology_license_get([cnt_id_bytes]) for license in licenses: yield converters.from_swh(license.to_dict(), hashess={"id"}) @pytest.fixture def keycloak_oidc(keycloak_oidc, mocker): keycloak_config = get_config()["keycloak"] keycloak_oidc.server_url = keycloak_config["server_url"] keycloak_oidc.realm_name = keycloak_config["realm_name"] keycloak_oidc.client_id = OIDC_SWH_WEB_CLIENT_ID keycloak_oidc_client = mocker.patch("swh.web.auth.views.keycloak_oidc_client") keycloak_oidc_client.return_value = keycloak_oidc return keycloak_oidc @pytest.fixture def subtest(request): """A hack to explicitly set up and tear down fixtures. This fixture allows you to set up and tear down fixtures within the test function itself. This is useful (necessary!) for using Hypothesis inside pytest, as hypothesis will call the test function multiple times, without setting up or tearing down fixture state as it is normally the case. Copied from the pytest-subtesthack project, public domain license (https://github.com/untitaker/pytest-subtesthack). """ parent_test = request.node def inner(func): if hasattr(Function, "from_parent"): item = Function.from_parent( parent_test, name=request.function.__name__ + "[]", originalname=request.function.__name__, callobj=func, ) else: item = Function( name=request.function.__name__ + "[]", parent=parent_test, callobj=func ) nextitem = parent_test # prevents pytest from tearing down module fixtures item.ihook.pytest_runtest_setup(item=item) try: item.ihook.pytest_runtest_call(item=item) finally: item.ihook.pytest_runtest_teardown(item=item, nextitem=nextitem) return inner @pytest.fixture def swh_scheduler(swh_scheduler): config = get_config() scheduler = config["scheduler"] config["scheduler"] = swh_scheduler # create load-git and load-hg task types for task_type in TASK_TYPES.values(): # see https://forge.softwareheritage.org/rDSCHc46ffadf7adf24c7eb3ffce062e8ade3818c79cc # noqa task_type["type"] = task_type["type"].replace("load-test-", "load-", 1) swh_scheduler.create_task_type(task_type) # create load-svn task type swh_scheduler.create_task_type( { "type": "load-svn", "description": "Update a Subversion repository", "backend_name": "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository", "default_interval": timedelta(days=64), "min_interval": timedelta(hours=12), "max_interval": timedelta(days=64), "backoff_factor": 2, "max_queue_length": None, "num_retries": 7, "retry_delay": timedelta(hours=2), } ) # create load-cvs task type swh_scheduler.create_task_type( { "type": "load-cvs", "description": "Update a CVS repository", "backend_name": "swh.loader.cvs.tasks.DumpMountAndLoadSvnRepository", "default_interval": timedelta(days=64), "min_interval": timedelta(hours=12), "max_interval": timedelta(days=64), "backoff_factor": 2, "max_queue_length": None, "num_retries": 7, "retry_delay": timedelta(hours=2), } ) # create load-bzr task type swh_scheduler.create_task_type( { "type": "load-bzr", "description": "Update a Bazaar repository", "backend_name": "swh.loader.bzr.tasks.LoadBazaar", "default_interval": timedelta(days=64), "min_interval": timedelta(hours=12), "max_interval": timedelta(days=64), "backoff_factor": 2, "max_queue_length": None, "num_retries": 7, "retry_delay": timedelta(hours=2), } ) # add method to add load-archive-files task type during tests def add_load_archive_task_type(): swh_scheduler.create_task_type( { "type": "load-archive-files", "description": "Load tarballs", "backend_name": "swh.loader.package.archive.tasks.LoadArchive", "default_interval": timedelta(days=64), "min_interval": timedelta(hours=12), "max_interval": timedelta(days=64), "backoff_factor": 2, "max_queue_length": None, "num_retries": 7, "retry_delay": timedelta(hours=2), } ) swh_scheduler.add_load_archive_task_type = add_load_archive_task_type yield swh_scheduler config["scheduler"] = scheduler get_scheduler_load_task_types.cache_clear() @pytest.fixture(scope="session") def django_db_setup(request, django_db_blocker, postgresql_proc): from django.conf import settings settings.DATABASES["default"].update( { ("ENGINE", "django.db.backends.postgresql"), ("NAME", get_config()["test_db"]["name"]), ("USER", postgresql_proc.user), ("HOST", postgresql_proc.host), ("PORT", postgresql_proc.port), } ) with django_db_blocker.unblock(): setup_databases( verbosity=request.config.option.verbose, interactive=False, keepdb=False ) @pytest.fixture def staff_user(): return User.objects.create_user(username="admin", password="", is_staff=True) @pytest.fixture def regular_user(): return User.objects.create_user(username="johndoe", password="") @pytest.fixture def regular_user2(): return User.objects.create_user(username="janedoe", password="") @pytest.fixture def add_forge_moderator(): moderator = User.objects.create_user(username="add-forge moderator", password="") moderator.user_permissions.add( create_django_permission(ADD_FORGE_MODERATOR_PERMISSION) ) return moderator @pytest.fixture def mailmap_admin(): mailmap_admin = User.objects.create_user(username="mailmap-admin", password="") mailmap_admin.user_permissions.add( create_django_permission(MAILMAP_ADMIN_PERMISSION) ) return mailmap_admin @pytest.fixture def mailmap_user(): mailmap_user = User.objects.create_user(username="mailmap-user", password="") mailmap_user.user_permissions.add(create_django_permission(MAILMAP_PERMISSION)) return mailmap_user def reload_urlconf(): from django.conf import settings clear_url_caches() urlconf = settings.ROOT_URLCONF if urlconf in sys.modules: reload(sys.modules[urlconf]) else: import_module(urlconf) class SwhSettingsWrapper(SettingsWrapper): def __setattr__(self, attr: str, value) -> None: super().__setattr__(attr, value) reload_urlconf() def finalize(self) -> None: super().finalize() reload_urlconf() @pytest.fixture def django_settings(): """Override pytest-django settings fixture in order to reload URLs when modifying settings in test and after test execution as most of them depend on installed django apps in swh-web. """ settings = SwhSettingsWrapper() yield settings settings.finalize() diff --git a/swh/web/tests/misc/test_metrics.py b/swh/web/tests/misc/test_metrics.py index 0d62b7b8..972df663 100644 --- a/swh/web/tests/misc/test_metrics.py +++ b/swh/web/tests/misc/test_metrics.py @@ -1,137 +1,137 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import timedelta from itertools import product import random from prometheus_client.exposition import CONTENT_TYPE_LATEST import pytest -from swh.web.common.models import ( +from swh.web.common.utils import reverse +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SaveOriginRequest, ) -from swh.web.common.origin_save import ( +from swh.web.save_code_now.origin_save import ( ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, ACCEPTED_SAVE_REQUESTS_METRIC, SUBMITTED_SAVE_REQUESTS_METRIC, get_savable_visit_types, ) -from swh.web.common.utils import reverse from swh.web.tests.django_asserts import assert_contains from swh.web.tests.utils import check_http_get_response @pytest.mark.django_db def test_origin_save_metrics(client, swh_scheduler): visit_types = get_savable_visit_types() request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) for _ in range(random.randint(50, 100)): visit_type = random.choice(visit_types) request_satus = random.choice(request_statuses) load_task_status = random.choice(load_task_statuses) sor = SaveOriginRequest.objects.create( origin_url="origin", visit_type=visit_type, status=request_satus, loading_task_status=load_task_status, ) if load_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED): delay = random.choice(range(60)) sor.visit_date = sor.request_date + timedelta(seconds=delay) sor.save() # Note that this injects dates in the future for the sake of the test only url = reverse("metrics-prometheus") resp = check_http_get_response( client, url, status_code=200, content_type=CONTENT_TYPE_LATEST ) accepted_requests = SaveOriginRequest.objects.filter(status=SAVE_REQUEST_ACCEPTED) labels_set = product(visit_types, load_task_statuses) for labels in labels_set: sor_count = accepted_requests.filter( visit_type=labels[0], loading_task_status=labels[1] ).count() metric_text = ( f"{ACCEPTED_SAVE_REQUESTS_METRIC}{{" f'load_task_status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(sor_count)}\n' ) assert_contains(resp, metric_text) labels_set = product(visit_types, request_statuses) for labels in labels_set: sor_count = SaveOriginRequest.objects.filter( visit_type=labels[0], status=labels[1] ).count() metric_text = ( f"{SUBMITTED_SAVE_REQUESTS_METRIC}{{" f'status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(sor_count)}\n' ) assert_contains(resp, metric_text) # delay metrics save_requests = SaveOriginRequest.objects.all() labels_set = product( visit_types, ( SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, ), ) for labels in labels_set: sors = save_requests.filter( visit_type=labels[0], loading_task_status=labels[1], visit_date__isnull=False, ) delay = 0 for sor in sors: delay += sor.visit_date.timestamp() - sor.request_date.timestamp() metric_delay_text = ( f"{ACCEPTED_SAVE_REQUESTS_DELAY_METRIC}{{" f'load_task_status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(delay)}\n' ) assert_contains(resp, metric_delay_text) diff --git a/swh/web/common/management/__init__.py b/swh/web/tests/save_code_now/__init__.py similarity index 100% copy from swh/web/common/management/__init__.py copy to swh/web/tests/save_code_now/__init__.py diff --git a/swh/web/tests/save_code_now/test_app.py b/swh/web/tests/save_code_now/test_app.py new file mode 100644 index 00000000..4654771c --- /dev/null +++ b/swh/web/tests/save_code_now/test_app.py @@ -0,0 +1,41 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from django.urls import get_resolver + +from swh.web.common.utils import reverse +from swh.web.save_code_now.urls import urlpatterns +from swh.web.tests.django_asserts import assert_not_contains +from swh.web.tests.utils import check_html_get_response + + +@pytest.mark.django_db +def test_save_code_now_deactivate(client, staff_user, origin, django_settings): + """Check Save code now feature is deactivated when the swh.web.save_code_now django + application is not in installed apps.""" + + django_settings.SWH_DJANGO_APPS = [ + app for app in django_settings.SWH_DJANGO_APPS if app != "swh.web.save_code_now" + ] + + url = reverse("swh-web-homepage") + client.force_login(staff_user) + resp = check_html_get_response(client, url, status_code=200) + assert_not_contains(resp, "swh-origin-save-item") + assert_not_contains(resp, "swh-origin-save-admin-item") + + url = reverse( + "browse-origin-directory", + query_params={"origin_url": origin["url"]}, + ) + + resp = check_html_get_response(client, url, status_code=200) + assert_not_contains(resp, "swh-take-new-snashot") + + save_code_now_view_names = set(urlpattern.name for urlpattern in urlpatterns) + all_view_names = set(get_resolver().reverse_dict.keys()) + assert save_code_now_view_names & all_view_names == set() diff --git a/swh/web/tests/common/test_django_command.py b/swh/web/tests/save_code_now/test_django_command.py similarity index 98% rename from swh/web/tests/common/test_django_command.py rename to swh/web/tests/save_code_now/test_django_command.py index aa9d1e18..9558e2a5 100644 --- a/swh/web/tests/common/test_django_command.py +++ b/swh/web/tests/save_code_now/test_django_command.py @@ -1,174 +1,174 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta, timezone from io import StringIO import pytest from django.core.management import call_command from swh.core.api.classes import stream_results -from swh.web.common.models import ( +from swh.web.common.typing import SaveOriginRequestInfo +from swh.web.config import get_config +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_TASK_FAILED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_FAILED, VISIT_STATUS_FULL, VISIT_STATUS_PARTIAL, ) -from swh.web.common.typing import SaveOriginRequestInfo -from swh.web.config import get_config -MODULE_FQDN = "swh.web.common.management.commands" +MODULE_FQDN = "swh.web.save_code_now.management.commands" COMMAND_NAME = "refresh_savecodenow_statuses" AUTHORIZED_ORIGIN_URL = "https://scm.ourproject.org/anonscm/%s" @pytest.fixture def mock_refresh(mocker): return mocker.patch( f"{MODULE_FQDN}.{COMMAND_NAME}.refresh_save_origin_request_statuses" ) @pytest.fixture def mock_scheduler(mocker, swh_scheduler): mock_scheduler = mocker.patch(f"{MODULE_FQDN}.{COMMAND_NAME}.get_scheduler") mock_scheduler.return_value = swh_scheduler return mock_scheduler @pytest.mark.parametrize("nb_results", [0, 10, 20]) def test_command_refresh__with_statuses_refreshed( mock_scheduler, mock_refresh, nb_results ): """Refresh status command reports non-terminal statuses updates.""" # fake returned refreshed status for 'archives' visit type mock_refresh.return_value = [ { "visit_type": "archives", } ] * nb_results out = StringIO() call_command(COMMAND_NAME, stdout=out) actual_output = out.getvalue() if nb_results > 0: assert f"updated {nb_results}" in actual_output else: assert "Nothing" in actual_output assert mock_scheduler.called assert mock_refresh.called @pytest.fixture def fake_refreshed_data(): """Prepare test data within the scheduler and the swh-web model db""" duplicated_origin_url = AUTHORIZED_ORIGIN_URL % "specific-origin" entries = [ { "visit_type": "archives", # ignored from recurring task scheduling "visit_status": VISIT_STATUS_FULL, "task_status": SAVE_TASK_SUCCEEDED, }, { "visit_type": "hg", # scheduled as recurring task "visit_status": VISIT_STATUS_PARTIAL, "task_status": SAVE_TASK_SUCCEEDED, }, { "visit_type": "svn", # scheduled as recurring task "visit_status": VISIT_STATUS_PARTIAL, "task_status": SAVE_TASK_SCHEDULED, }, { "visit_type": "svn", # ignored from recurring task scheduling "visit_status": VISIT_STATUS_FAILED, "task_status": SAVE_TASK_FAILED, }, { "visit_type": "hg", # ignored from recurring task scheduling "visit_status": "created", "task_status": SAVE_TASK_SCHEDULED, }, ] + [ { "visit_type": "git", "visit_status": VISIT_STATUS_FULL, "task_status": SAVE_TASK_SUCCEEDED, "origin": duplicated_origin_url, } ] * 3 # only 1 of the origin duplicates will be scheduled as recurring task time_now = datetime.now(tz=timezone.utc) - timedelta(days=len(entries)) return [ SaveOriginRequestInfo( visit_type=meta["visit_type"], visit_status=meta["visit_status"], origin_url=( meta["origin"] if "origin" in meta else AUTHORIZED_ORIGIN_URL % i ), save_request_date=time_now + timedelta(days=i - 1), save_request_status=SAVE_REQUEST_ACCEPTED, visit_date=time_now + timedelta(days=i), save_task_status=meta["task_status"], id=i, loading_task_id=i, note=None, ) for i, meta in enumerate(entries) ] def test_command_refresh__with_recurrent_tasks_scheduling( mock_scheduler, mock_refresh, fake_refreshed_data, swh_scheduler ): """Refresh status command report updates of statuses. The successful ones without the type 'archived' are also scheduled recurringly. """ mock_refresh.return_value = fake_refreshed_data # only visit types (git, hg, svn) types with status (full, partial) are taken into # account for scheduling, so only 3 of those matches in the fake data set. expected_nb_scheduled = 0 origins = set() expected_nb_scheduled = 0 for entry in fake_refreshed_data: visit_type = entry["visit_type"] if visit_type == "archives": # only deal with git, svn, hg continue if entry["visit_status"] not in ("partial", "full"): continue origin = entry["origin_url"] if (visit_type, origin) in origins: continue origins.add((visit_type, origin)) expected_nb_scheduled += 1 assert expected_nb_scheduled == 3 out = StringIO() call_command(COMMAND_NAME, stdout=out) actual_output = out.getvalue() assert f"Successfully updated {len(fake_refreshed_data)}" in actual_output lister = swh_scheduler.get_or_create_lister( name="save-code-now", instance_name=get_config()["instance_name"] ) result = list(stream_results(swh_scheduler.get_listed_origins, lister.id)) assert len(result) == expected_nb_scheduled assert mock_scheduler.called assert mock_refresh.called diff --git a/swh/web/tests/test_migrations.py b/swh/web/tests/save_code_now/test_migrations.py similarity index 98% rename from swh/web/tests/test_migrations.py rename to swh/web/tests/save_code_now/test_migrations.py index 88edf1d3..37edeafd 100644 --- a/swh/web/tests/test_migrations.py +++ b/swh/web/tests/save_code_now/test_migrations.py @@ -1,60 +1,60 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -APP_NAME = "swh_web_common" +APP_NAME = "swh_web_save_code_now" MIGRATION_0008 = "0008_save-code-now_indexes_20210106_1327" MIGRATION_0009 = "0009_saveoriginrequest_visit_status" MIGRATION_0010 = "0010_saveoriginrequest_user_id" MIGRATION_0011 = "0011_saveoriginrequest_user_ids" MIGRATION_0012 = "0012_saveoriginrequest_note" def test_migrations_09_add_visit_status_to_sor_model(migrator): """Ensures the migration adds the visit_status field to SaveOriginRequest table""" old_state = migrator.apply_initial_migration( (APP_NAME, MIGRATION_0008), ) old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(old_model, "visit_status") is False new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0009)) new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(new_model, "visit_status") is True def test_migrations_10_add_user_id_to_sor_model(migrator): """Ensures the migration adds the user_id field to SaveOriginRequest table""" old_state = migrator.apply_initial_migration( (APP_NAME, MIGRATION_0009), ) old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(old_model, "user_id") is False new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0010)) new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(new_model, "user_id") is True def test_migrations_12_add_note_to_sor_model(migrator): """Ensures the migration adds the user_id field to SaveOriginRequest table""" old_state = migrator.apply_initial_migration( (APP_NAME, MIGRATION_0011), ) old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(old_model, "note") is False new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0012)) new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(new_model, "note") is True diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/save_code_now/test_origin_save.py similarity index 98% rename from swh/web/tests/common/test_origin_save.py rename to swh/web/tests/save_code_now/test_origin_save.py index 507a34a7..a79b2a9b 100644 --- a/swh/web/tests/common/test_origin_save.py +++ b/swh/web/tests/save_code_now/test_origin_save.py @@ -1,789 +1,789 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta, timezone from functools import partial import re from typing import Optional import uuid import iso8601 import pytest import requests from swh.core.pytest_plugin import get_response_cb from swh.scheduler.utils import create_oneshot_task_dict from swh.web.common.exc import BadInputExc -from swh.web.common.models import ( +from swh.web.common.typing import ( + OriginExistenceCheckInfo, + OriginVisitInfo, + SaveOriginRequestInfo, +) +from swh.web.config import get_config +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_CREATED, VISIT_STATUS_FULL, VISIT_STATUS_ONGOING, VISIT_STATUS_PARTIAL, SaveOriginRequest, ) -from swh.web.common.origin_save import ( +from swh.web.save_code_now.origin_save import ( _check_origin_exists, _check_visit_type_savable, _visit_type_task, _visit_type_task_privileged, get_savable_visit_types, get_save_origin_requests, get_save_origin_task_info, origin_exists, refresh_save_origin_request_statuses, ) -from swh.web.common.typing import ( - OriginExistenceCheckInfo, - OriginVisitInfo, - SaveOriginRequestInfo, -) -from swh.web.config import get_config _es_url = "http://esnode1.internal.softwareheritage.org:9200" _es_workers_index_url = "%s/swh_workers-*" % _es_url _origin_url = "https://gitlab.com/inkscape/inkscape" _visit_type = "git" _task_id = 1 @pytest.fixture(autouse=True) def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with post method""" cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.post(re.compile("https?://"), body=cb) return requests_mock_datadir @pytest.mark.django_db def test_get_save_origin_archived_task_info(swh_scheduler): _get_save_origin_task_info_test(swh_scheduler, task_archived=True) @pytest.mark.django_db def test_get_save_origin_task_info_without_es(swh_scheduler): _get_save_origin_task_info_test(swh_scheduler, es_available=False) def _fill_scheduler_db( swh_scheduler, task_status="completed", task_run_status="eventful", task_archived=False, visit_started_date=None, ): task = task_run = None if not task_archived: task = swh_scheduler.create_tasks( [create_oneshot_task_dict("load-git", repo_url=_origin_url)] )[0] backend_id = str(uuid.uuid4()) if task_status != "next_run_not_scheduled": swh_scheduler.schedule_task_run(task["id"], backend_id) if task_run_status is not None: swh_scheduler.start_task_run(backend_id) task_run = dict( swh_scheduler.end_task_run(backend_id, task_run_status).items() ) return task, task_run @pytest.mark.parametrize( "wrong_type,privileged_user", [ ("dummy", True), ("dumb", False), ("archives", False), # when no privilege, this is rejected ], ) def test_check_visit_type_savable(wrong_type, privileged_user, swh_scheduler): swh_scheduler.add_load_archive_task_type() with pytest.raises(BadInputExc, match="Allowed types"): _check_visit_type_savable(wrong_type, privileged_user) # when privileged_user, the following is accepted though _check_visit_type_savable("archives", True) def test_get_savable_visit_types(swh_scheduler): swh_scheduler.add_load_archive_task_type() default_list = list(_visit_type_task.keys()) assert set(get_savable_visit_types()) == set(default_list) privileged_list = default_list.copy() privileged_list += list(_visit_type_task_privileged.keys()) assert set(get_savable_visit_types(privileged_user=True)) == set(privileged_list) def _get_save_origin_task_info_test( swh_scheduler, task_archived=False, es_available=True, full_info=True ): swh_web_config = get_config() if es_available: swh_web_config.update({"es_workers_index_url": _es_workers_index_url}) else: swh_web_config.update({"es_workers_index_url": ""}) sor = SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, origin_url="https://gitlab.com/inkscape/inkscape", status=SAVE_REQUEST_ACCEPTED, visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=_task_id, ) task, task_run = _fill_scheduler_db(swh_scheduler, task_archived=task_archived) es_response = requests.post("%s/_search" % _es_workers_index_url).json() task_exec_data = es_response["hits"]["hits"][-1]["_source"] sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info) expected_result = ( { "type": task["type"], "arguments": task["arguments"], "id": task["id"], "backend_id": task_run["backend_id"], "scheduled": task_run["scheduled"], "started": task_run["started"], "ended": task_run["ended"], "status": task_run["status"], "visit_status": sor.visit_status, } if not task_archived else {} ) if es_available and not task_archived: expected_result.update( { "message": task_exec_data["message"], "name": task_exec_data["swh_task_name"], "worker": task_exec_data["hostname"], } ) if not full_info: expected_result.pop("id", None) expected_result.pop("backend_id", None) expected_result.pop("worker", None) if "message" in expected_result: message = "" message_lines = expected_result["message"].split("\n") for line in message_lines: if line.startswith("Traceback"): break message += f"{line}\n" message += message_lines[-1] expected_result["message"] = message assert sor_task_info == expected_result @pytest.mark.django_db def test_get_save_origin_requests_find_visit_date(mocker, swh_scheduler): # create a save request SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, visit_date=None, loading_task_id=_task_id, ) # mock scheduler and archive _fill_scheduler_db(swh_scheduler) - mock_archive = mocker.patch("swh.web.common.origin_save.archive") + mock_archive = mocker.patch("swh.web.save_code_now.origin_save.archive") mock_archive.lookup_origin.return_value = {"url": _origin_url} # create a visit for the save request visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", status=VISIT_STATUS_FULL, type=_visit_type, url="", visit=34, ) mock_archive.origin_visit_find_by_date.return_value = visit_info # check visit date has been correctly found sors = get_save_origin_requests(_visit_type, _origin_url) assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED assert sors[0]["visit_date"] == visit_date mock_archive.origin_visit_find_by_date.assert_called_once() # check visit is not searched again when it has been found get_save_origin_requests(_visit_type, _origin_url) mock_archive.origin_visit_find_by_date.assert_called_once() # check visit date are not searched for save requests older than # one month sor = SaveOriginRequest.objects.create( visit_type=_visit_type, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, loading_task_id=_task_id, visit_date=None, ) sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31) sor.save() _fill_scheduler_db(swh_scheduler, task_status="disabled", task_run_status="failed") sors = get_save_origin_requests(_visit_type, _origin_url) assert len(sors) == 2 assert sors[0]["save_task_status"] == SAVE_TASK_FAILED assert sors[0]["visit_date"] is None mock_archive.origin_visit_find_by_date.assert_called_once() def _get_save_origin_requests( mocker, swh_scheduler, load_status, visit_status, request_date: Optional[datetime] = None, ): """Wrapper around the get_origin_save_origin_request call.""" SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, visit_status=visit_status, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, visit_date=None, loading_task_id=_task_id, ) # mock scheduler and archives _fill_scheduler_db( swh_scheduler, task_status="next_run_scheduled", task_run_status=load_status ) - mock_archive = mocker.patch("swh.web.common.origin_save.archive") + mock_archive = mocker.patch("swh.web.save_code_now.origin_save.archive") mock_archive.lookup_origin.return_value = {"url": _origin_url} # create a visit for the save request with status created visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", # make mypy happy status=visit_status, type=_visit_type, url="", visit=34, ) mock_archive.origin_visit_find_by_date.return_value = visit_info sors = get_save_origin_requests(_visit_type, _origin_url) mock_archive.origin_visit_find_by_date.assert_called_once() return sors @pytest.mark.parametrize("visit_date", [None, "some-date"]) def test_from_save_origin_request_to_save_request_info_dict(visit_date): """Ensure save request to json serializable dict is fine""" request_date = datetime.now(tz=timezone.utc) _visit_date = request_date + timedelta(minutes=5) if visit_date else None request_date = datetime.now(tz=timezone.utc) note = "request succeeded" sor = SaveOriginRequest( request_date=request_date, visit_type=_visit_type, visit_status=VISIT_STATUS_FULL, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, loading_task_status=None, visit_date=_visit_date, loading_task_id=1, note=note, ) assert sor.to_dict() == SaveOriginRequestInfo( id=sor.id, origin_url=sor.origin_url, visit_type=sor.visit_type, save_request_date=sor.request_date.isoformat(), save_request_status=sor.status, save_task_status=sor.loading_task_status, visit_status=sor.visit_status, visit_date=_visit_date.isoformat() if _visit_date else None, loading_task_id=sor.loading_task_id, note=note, ) def test__check_origin_exists_404(requests_mock): url_ko = "https://example.org/some-inexistant-url" requests_mock.head(url_ko, status_code=404) with pytest.raises(BadInputExc, match="not exist"): _check_origin_exists(url_ko) def test__check_origin_exists_200(requests_mock): url = "https://example.org/url" requests_mock.head(url, status_code=200) # passes the check actual_metadata = _check_origin_exists(url) # and we actually may have retrieved some metadata on the origin assert actual_metadata == origin_exists(url) def test_origin_exists_404(requests_mock): """Origin which does not exist should be reported as inexistent""" url_ko = "https://example.org/some-inexistant-url" requests_mock.head(url_ko, status_code=404) actual_result = origin_exists(url_ko) assert actual_result == OriginExistenceCheckInfo( origin_url=url_ko, exists=False, last_modified=None, content_length=None, ) def test_origin_exists_200_no_data(requests_mock): """Existing origin should be reported as such (no extra information)""" url = "http://example.org/real-url" requests_mock.head( url, status_code=200, ) actual_result = origin_exists(url) assert actual_result == OriginExistenceCheckInfo( origin_url=url, exists=True, last_modified=None, content_length=None, ) def test_origin_exists_200_with_data(requests_mock): """Existing origin should be reported as such (+ extra information)""" url = "http://example.org/real-url" requests_mock.head( url, status_code=200, headers={ "content-length": "10", "last-modified": "Sun, 21 Aug 2011 16:26:32 GMT", }, ) actual_result = origin_exists(url) assert actual_result == OriginExistenceCheckInfo( origin_url=url, exists=True, content_length=10, last_modified="2011-08-21T16:26:32", ) def test_origin_exists_internet_archive(requests_mock): """Edge case where an artifact URL to check existence is hosted on the Internet Archive""" url = ( "https://web.archive.org/web/20100705043309/" "http://www.cs.unm.edu/~mccune/old-ftp/eqp-09e.tar.gz" ) redirect_url = ( "https://web.archive.org/web/20100610004108/" "http://www.cs.unm.edu/~mccune/old-ftp/eqp-09e.tar.gz" ) requests_mock.head( url, status_code=302, headers={ "Location": redirect_url, }, ) requests_mock.head( redirect_url, status_code=200, headers={ "X-Archive-Orig-Last-Modified": "Tue, 12 May 2009 22:09:43 GMT", "X-Archive-Orig-Content-Length": "121421", }, ) actual_result = origin_exists(url) assert actual_result == OriginExistenceCheckInfo( origin_url=url, exists=True, content_length=121421, last_modified="2009-05-12T22:09:43", ) def test_origin_exists_200_with_data_unexpected_date_format(requests_mock): """Existing origin should be ok, unexpected last modif time result in no time""" url = "http://example.org/real-url2" # this is parsable but not as expected unexpected_format_date = "Sun, 21 Aug 2021 16:26:32" requests_mock.head( url, status_code=200, headers={ "last-modified": unexpected_format_date, }, ) actual_result = origin_exists(url) # so the resulting date is None assert actual_result == OriginExistenceCheckInfo( origin_url=url, exists=True, content_length=None, last_modified=None, ) @pytest.mark.django_db @pytest.mark.parametrize( "visit_status", [ VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING, ], ) def test_get_save_origin_requests_no_visit_date_found( mocker, swh_scheduler, visit_status ): """Uneventful visits with failed visit status are marked as failed""" sors = _get_save_origin_requests( mocker, swh_scheduler, load_status="scheduled", visit_status=visit_status, ) # check no visit date has been found assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING assert sors[0]["visit_date"] is not None assert sors[0]["visit_status"] == visit_status @pytest.mark.django_db @pytest.mark.parametrize( "visit_status", [ "not_found", "failed", ], ) def test_get_save_origin_requests_no_failed_status_override( mocker, swh_scheduler, visit_status ): """Uneventful visits with failed statuses (failed, not found) are marked as failed""" sors = _get_save_origin_requests( mocker, swh_scheduler, load_status="uneventful", visit_status=visit_status ) assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_FAILED visit_date = sors[0]["visit_date"] assert visit_date is not None sors = get_save_origin_requests(_visit_type, _origin_url) assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_FAILED assert sors[0]["visit_status"] == visit_status @pytest.mark.django_db @pytest.mark.parametrize( "load_status,visit_status", [ ("eventful", VISIT_STATUS_FULL), ("eventful", VISIT_STATUS_PARTIAL), ("uneventful", VISIT_STATUS_PARTIAL), ], ) def test_get_visit_info_for_save_request_succeeded( mocker, swh_scheduler, load_status, visit_status ): """Nominal scenario, below 30 days, returns something""" sors = _get_save_origin_requests( mocker, swh_scheduler, load_status=load_status, visit_status=visit_status ) assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED assert sors[0]["visit_date"] is not None assert sors[0]["visit_status"] == visit_status sors = get_save_origin_requests(_visit_type, _origin_url) assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED assert sors[0]["visit_status"] == visit_status @pytest.mark.django_db @pytest.mark.parametrize( "load_status", [ "eventful", "uneventful", ], ) def test_get_visit_info_incomplete_visit_still_successful( mocker, swh_scheduler, load_status ): """Incomplete visit information, yet the task is updated partially""" sors = _get_save_origin_requests( mocker, swh_scheduler, load_status=load_status, visit_status=None, ) assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED # As the entry is missing the following information though assert sors[0]["visit_date"] is not None assert sors[0]["visit_status"] is None # It's still detected as to be updated by the refresh routine sors = refresh_save_origin_request_statuses() assert len(sors) == 1 assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED assert sors[0]["visit_date"] is not None assert sors[0]["visit_status"] is None @pytest.mark.django_db def test_refresh_in_progress_save_request_statuses( mocker, swh_scheduler, api_client, archive_data ): """Refresh a pending save origins requests and update if the status changes""" date_now = datetime.now(tz=timezone.utc) date_pivot = date_now - timedelta(days=30) visit_started_date = date_now - timedelta(minutes=1) # returned visit status SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, visit_status=VISIT_STATUS_CREATED, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, visit_date=None, loading_task_id=_task_id, ) # mock scheduler and archives _fill_scheduler_db( swh_scheduler, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED, ) - mock_archive = mocker.patch("swh.web.common.origin_save.archive") + mock_archive = mocker.patch("swh.web.save_code_now.origin_save.archive") mock_archive.lookup_origin.return_value = {"url": _origin_url} # create a visit for the save request with status created visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", # make mypy happy status=VISIT_STATUS_CREATED, type=_visit_type, url="", visit=34, ) mock_archive.origin_visit_find_by_date.return_value = visit_info # make the scheduler return a running event _fill_scheduler_db( swh_scheduler, task_status="next_run_scheduled", task_run_status="started", visit_started_date=visit_started_date, ) # The visit is detected but still running sors = refresh_save_origin_request_statuses() assert ( mock_archive.origin_visit_find_by_date.called and mock_archive.origin_visit_find_by_date.call_count == 1 ) assert len(sors) == 1 for sor in sors: assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot # The status is updated assert sor["save_task_status"] == SAVE_TASK_RUNNING # but the following entries are missing so it's not updated assert sor["visit_date"] is not None assert sor["visit_status"] == VISIT_STATUS_CREATED # make the visit status completed # make the scheduler return a running event _fill_scheduler_db( swh_scheduler, task_status="completed", task_run_status="eventful", visit_started_date=visit_started_date, ) # This time around, the origin returned will have all required information updated # (visit date and visit status in final state) visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info.update({"date": visit_date, "status": VISIT_STATUS_FULL}) mock_archive.origin_visit_find_by_date.return_value = visit_info # Detected entry, this time it should be updated sors = refresh_save_origin_request_statuses() assert len(sors) == 1 assert ( mock_archive.origin_visit_find_by_date.called and mock_archive.origin_visit_find_by_date.call_count == 1 + 1 ) for sor in sors: assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot # as it turns out, in this test, this won't update anything as no new status got # returned by the scheduler assert sor["save_task_status"] == SAVE_TASK_SUCCEEDED assert sor["visit_date"] == visit_date assert sor["visit_status"] == VISIT_STATUS_FULL # Once in final state, a sor should not be updated anymore sors = refresh_save_origin_request_statuses() assert len(sors) == 0 @pytest.mark.django_db def test_refresh_save_request_statuses(mocker, swh_scheduler, api_client, archive_data): """Refresh filters save origins requests and update if changes""" date_now = datetime.now(tz=timezone.utc) date_pivot = date_now - timedelta(days=30) # returned visit status SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, visit_status=None, origin_url=_origin_url, status=SAVE_REQUEST_ACCEPTED, visit_date=None, loading_task_id=_task_id, ) # mock scheduler and archives _fill_scheduler_db( swh_scheduler, task_status="next_run_scheduled", task_run_status=SAVE_TASK_SCHEDULED, ) - mock_archive = mocker.patch("swh.web.common.origin_save.archive") + mock_archive = mocker.patch("swh.web.save_code_now.origin_save.archive") mock_archive.lookup_origin.return_value = {"url": _origin_url} # create a visit for the save request with status created visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", # make mypy happy status=VISIT_STATUS_CREATED, type=_visit_type, url="", visit=34, ) mock_archive.origin_visit_find_by_date.return_value = visit_info # no changes so refresh does detect the entry but does nothing sors = refresh_save_origin_request_statuses() assert len(sors) == 1 for sor in sors: assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot # as it turns out, in this test, this won't update anything as no new status got # returned by the scheduler assert sor["save_task_status"] == SAVE_TASK_RUNNING # Information is empty assert sor["visit_date"] == visit_date assert sor["visit_status"] == VISIT_STATUS_CREATED # A save code now entry is detected for update, but as nothing changes, the entry # remains in the same state sors = refresh_save_origin_request_statuses() assert len(sors) == 1 for sor in sors: assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot # Status is not updated as no new information is available on the visit status # and the task status has not moved assert sor["save_task_status"] == SAVE_TASK_RUNNING # Information is empty assert sor["visit_date"] == visit_date assert sor["visit_status"] == VISIT_STATUS_CREATED # This time around, the origin returned will have all information updated # create a visit for the save request with status created visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", # make mypy happy status=VISIT_STATUS_FULL, type=_visit_type, url="", visit=34, ) mock_archive.origin_visit_find_by_date.return_value = visit_info # Detected entry, this time it should be updated sors = refresh_save_origin_request_statuses() assert len(sors) == 1 for sor in sors: assert iso8601.parse_date(sor["save_request_date"]) >= date_pivot # as it turns out, in this test, this won't update anything as no new status got # returned by the scheduler assert sor["save_task_status"] == SAVE_TASK_SUCCEEDED assert sor["visit_date"] == visit_date assert sor["visit_status"] == VISIT_STATUS_FULL # This time, nothing left to update sors = refresh_save_origin_request_statuses() assert len(sors) == 0 diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/save_code_now/test_origin_save_admin.py similarity index 98% rename from swh/web/tests/admin/test_origin_save.py rename to swh/web/tests/save_code_now/test_origin_save_admin.py index 9b2aa25a..a140784a 100644 --- a/swh/web/tests/admin/test_origin_save.py +++ b/swh/web/tests/save_code_now/test_origin_save_admin.py @@ -1,227 +1,227 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from urllib.parse import unquote import pytest -from swh.web.common.models import ( +from swh.web.common.utils import reverse +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_YET_SCHEDULED, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) -from swh.web.common.origin_save import can_save_origin -from swh.web.common.utils import reverse +from swh.web.save_code_now.origin_save import can_save_origin from swh.web.tests.utils import check_http_get_response, check_http_post_response _authorized_origin_url = "https://scm.ourproject.org/anonscm/" _unauthorized_origin_url = "https://www.softwareheritage.org/" pytestmark = pytest.mark.django_db @pytest.fixture(autouse=True) def populated_db(): SaveAuthorizedOrigin.objects.create(url=_authorized_origin_url) SaveUnauthorizedOrigin.objects.create(url=_unauthorized_origin_url) def check_not_login(client, url): login_url = reverse("login", query_params={"next": url}) resp = check_http_post_response(client, url, status_code=302) assert unquote(resp.url) == login_url def test_add_authorized_origin_url(client, staff_user): authorized_url = "https://scm.adullact.net/anonscm/" assert can_save_origin(authorized_url) == SAVE_REQUEST_PENDING url = reverse( "admin-origin-save-add-authorized-url", url_args={"origin_url": authorized_url} ) check_not_login(client, url) assert can_save_origin(authorized_url) == SAVE_REQUEST_PENDING client.force_login(staff_user) check_http_post_response(client, url, status_code=200) assert can_save_origin(authorized_url) == SAVE_REQUEST_ACCEPTED def test_remove_authorized_origin_url(client, staff_user): assert can_save_origin(_authorized_origin_url) == SAVE_REQUEST_ACCEPTED url = reverse( "admin-origin-save-remove-authorized-url", url_args={"origin_url": _authorized_origin_url}, ) check_not_login(client, url) assert can_save_origin(_authorized_origin_url) == SAVE_REQUEST_ACCEPTED client.force_login(staff_user) check_http_post_response(client, url, status_code=200) assert can_save_origin(_authorized_origin_url) == SAVE_REQUEST_PENDING def test_add_unauthorized_origin_url(client, staff_user): unauthorized_url = "https://www.yahoo./" assert can_save_origin(unauthorized_url) == SAVE_REQUEST_PENDING url = reverse( "admin-origin-save-add-unauthorized-url", url_args={"origin_url": unauthorized_url}, ) check_not_login(client, url) assert can_save_origin(unauthorized_url) == SAVE_REQUEST_PENDING client.force_login(staff_user) check_http_post_response(client, url, status_code=200) assert can_save_origin(unauthorized_url) == SAVE_REQUEST_REJECTED def test_remove_unauthorized_origin_url(client, staff_user): assert can_save_origin(_unauthorized_origin_url) == SAVE_REQUEST_REJECTED url = reverse( "admin-origin-save-remove-unauthorized-url", url_args={"origin_url": _unauthorized_origin_url}, ) check_not_login(client, url) assert can_save_origin(_unauthorized_origin_url) == SAVE_REQUEST_REJECTED client.force_login(staff_user) check_http_post_response(client, url, status_code=200) assert can_save_origin(_unauthorized_origin_url) == SAVE_REQUEST_PENDING def test_accept_pending_save_request(client, staff_user, swh_scheduler): visit_type = "git" origin_url = "https://v2.pikacode.com/bthate/botlib.git" save_request_url = reverse( "api-1-save-origin", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) response = check_http_post_response(client, save_request_url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING accept_request_url = reverse( "admin-origin-save-request-accept", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) check_not_login(client, accept_request_url) client.force_login(staff_user) response = check_http_post_response(client, accept_request_url, status_code=200) response = check_http_get_response(client, save_request_url, status_code=200) assert response.data[0]["save_request_status"] == SAVE_REQUEST_ACCEPTED assert response.data[0]["save_task_status"] == SAVE_TASK_NOT_YET_SCHEDULED def test_reject_pending_save_request(client, staff_user, swh_scheduler): visit_type = "git" origin_url = "https://wikipedia.com" save_request_url = reverse( "api-1-save-origin", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) response = check_http_post_response(client, save_request_url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING reject_request_url = reverse( "admin-origin-save-request-reject", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) check_not_login(client, reject_request_url) client.force_login(staff_user) response = check_http_post_response(client, reject_request_url, status_code=200) response = check_http_get_response(client, save_request_url, status_code=200) assert response.data[0]["save_request_status"] == SAVE_REQUEST_REJECTED assert response.data[0]["note"] is None def test_reject_pending_save_request_not_found(client, staff_user, swh_scheduler): visit_type = "git" origin_url = "https://example.org" reject_request_url = reverse( "admin-origin-save-request-reject", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) client.force_login(staff_user) check_http_post_response(client, reject_request_url, status_code=404) def test_reject_pending_save_request_with_note(client, staff_user, swh_scheduler): visit_type = "git" origin_url = "https://wikipedia.com" save_request_url = reverse( "api-1-save-origin", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) response = check_http_post_response(client, save_request_url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING reject_request_url = reverse( "admin-origin-save-request-reject", url_args={"visit_type": visit_type, "origin_url": origin_url}, ) data = {"note": "The URL does not target a git repository"} client.force_login(staff_user) response = check_http_post_response( client, reject_request_url, status_code=200, data=data ) response = check_http_get_response(client, save_request_url, status_code=200) assert response.data[0]["save_request_status"] == SAVE_REQUEST_REJECTED assert response.data[0]["note"] == data["note"] def test_remove_save_request(client, staff_user): sor = SaveOriginRequest.objects.create( visit_type="git", origin_url="https://wikipedia.com", status=SAVE_REQUEST_PENDING, ) assert SaveOriginRequest.objects.count() == 1 remove_request_url = reverse( "admin-origin-save-request-remove", url_args={"sor_id": sor.id} ) check_not_login(client, remove_request_url) client.force_login(staff_user) check_http_post_response(client, remove_request_url, status_code=200) assert SaveOriginRequest.objects.count() == 0 diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/save_code_now/test_origin_save_api.py similarity index 98% rename from swh/web/tests/api/views/test_origin_save.py rename to swh/web/tests/save_code_now/test_origin_save_api.py index 9ae6bbe9..647b23ad 100644 --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/save_code_now/test_origin_save_api.py @@ -1,657 +1,657 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta import uuid import pytest from django.core.exceptions import ObjectDoesNotExist from django.utils import timezone from swh.web.api.throttling import SwhWebUserRateThrottle from swh.web.auth.utils import API_SAVE_ORIGIN_PERMISSION, SWH_AMBASSADOR_PERMISSION -from swh.web.common.models import ( +from swh.web.common.typing import OriginExistenceCheckInfo +from swh.web.common.utils import reverse +from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_FAILED, VISIT_STATUS_FULL, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) -from swh.web.common.typing import OriginExistenceCheckInfo -from swh.web.common.utils import reverse from swh.web.settings.tests import save_origin_rate_post from swh.web.tests.utils import ( check_api_get_responses, check_api_post_response, check_api_post_responses, create_django_permission, ) pytestmark = pytest.mark.django_db @pytest.fixture(autouse=True) def populated_db(): SaveAuthorizedOrigin.objects.create(url="https://github.com/"), SaveAuthorizedOrigin.objects.create(url="https://gitlab.com/"), SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo") SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude") def test_invalid_visit_type(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={ "visit_type": "foo", "origin_url": "https://github.com/torvalds/linux", }, ) check_api_get_responses(api_client, url, status_code=400) def test_invalid_origin_url(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"} ) check_api_get_responses(api_client, url, status_code=400) def check_created_save_request_status( api_client, mocker, origin_url, expected_request_status, expected_task_status=None, visit_date=None, ): - mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists") + mock_origin_exists = mocker.patch("swh.web.save_code_now.origin_save.origin_exists") mock_origin_exists.return_value = OriginExistenceCheckInfo( origin_url=origin_url, exists=True, last_modified=None, content_length=None ) url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( - ("swh.web.common.origin_save._get_visit_info_for_save_request") + ("swh.web.save_code_now.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, None) if expected_request_status != SAVE_REQUEST_REJECTED: response = check_api_post_responses(api_client, url, data=None, status_code=200) assert response.data["save_request_status"] == expected_request_status assert response.data["save_task_status"] == expected_task_status else: check_api_post_responses(api_client, url, data=None, status_code=403) def check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status, expected_task_status, scheduler_task_status="next_run_not_scheduled", scheduler_task_run_status=None, visit_date=None, visit_status=None, ): if expected_task_status != SAVE_TASK_NOT_CREATED: task = dict(swh_scheduler.search_tasks()[0].items()) backend_id = str(uuid.uuid4()) if scheduler_task_status != "next_run_not_scheduled": swh_scheduler.schedule_task_run(task["id"], backend_id) if scheduler_task_run_status is not None: swh_scheduler.start_task_run(backend_id) task_run = dict( swh_scheduler.end_task_run(backend_id, scheduler_task_run_status).items() ) url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( - ("swh.web.common.origin_save._get_visit_info_for_save_request") + ("swh.web.save_code_now.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, visit_status) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_request_status"] == expected_request_status assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status if scheduler_task_run_status is not None: # Check that save task status is still available when # the scheduler task has been archived swh_scheduler.delete_archived_tasks( [{"task_id": task["id"], "task_run_id": task_run["id"]}] ) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status def test_save_request_rejected(api_client, mocker, swh_scheduler): origin_url = "https://github.com/user/illegal_repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_pending(api_client, mocker, swh_scheduler): origin_url = "https://unkwownforge.com/user/repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_scheduled(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) def test_save_request_completed(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=None, ) def test_save_request_completed_visit_status(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=visit_date, visit_status=VISIT_STATUS_FULL, ) def test_save_request_failed(api_client, mocker, swh_scheduler): origin_url = "https://gitlab.com/inkscape/inkscape" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_FAILED, scheduler_task_status="disabled", scheduler_task_run_status="failed", visit_status=VISIT_STATUS_FAILED, ) def test_create_save_request_no_duplicate(api_client, mocker, swh_scheduler): origin_url = "https://github.com/webpack/webpack" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 def test_get_save_requests_unknown_origin(api_client, swh_scheduler): unknown_origin_url = "https://gitlab.com/foo/bar" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": unknown_origin_url}, ) response = check_api_get_responses(api_client, url, status_code=404) assert response.data == { "exception": "NotFoundExc", "reason": ( "No save requests found for visit of type git on origin with url %s." ) % unknown_origin_url, } _visit_type = "git" _origin_url = "https://github.com/python/cpython" def test_save_requests_rate_limit(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) for _ in range(save_origin_rate_post): check_api_post_response(api_client, url, status_code=200) check_api_post_response(api_client, url, status_code=429) def test_save_requests_no_rate_limit_if_permission( api_client, regular_user, swh_scheduler ): regular_user.user_permissions.add( create_django_permission(API_SAVE_ORIGIN_PERMISSION) ) assert regular_user.has_perm(API_SAVE_ORIGIN_PERMISSION) api_client.force_login(regular_user) url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) for _ in range(save_origin_rate_post * SwhWebUserRateThrottle.NUM_REQUESTS_FACTOR): check_api_post_response(api_client, url, status_code=200) check_api_post_response(api_client, url, status_code=200) def test_save_request_unknown_repo_with_permission( api_client, regular_user, mocker, swh_scheduler ): regular_user.user_permissions.add( create_django_permission(API_SAVE_ORIGIN_PERMISSION) ) assert regular_user.has_perm(API_SAVE_ORIGIN_PERMISSION) api_client.force_login(regular_user) origin_url = "https://unkwownforge.org/user/repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) def test_save_request_form_server_error(api_client, mocker): create_save_origin_request = mocker.patch( - "swh.web.api.views.origin_save.create_save_origin_request" + "swh.web.save_code_now.api_views.create_save_origin_request" ) create_save_origin_request.side_effect = Exception("Server error") url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) check_api_post_responses(api_client, url, status_code=500) @pytest.fixture def origin_to_review(): return "https://git.example.org/user/project" def test_create_save_request_pending_review_anonymous_user( api_client, origin_to_review, swh_scheduler ): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_to_review}, ) response = check_api_post_responses(api_client, url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING with pytest.raises(ObjectDoesNotExist): SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_archives_with_ambassador_user( api_client, keycloak_oidc, requests_mock, swh_scheduler, ): swh_scheduler.add_load_archive_task_type() keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") originUrl = "https://somewhere.org/simple" artifact_version = "1.2.3" artifact_filename = f"tarball-{artifact_version}.tar.gz" artifact_url = f"{originUrl}/{artifact_filename}" content_length = "100" last_modified = "Sun, 21 Aug 2011 16:26:32 GMT" requests_mock.head( artifact_url, status_code=200, headers={ "content-length": content_length, "last-modified": last_modified, }, ) url = reverse( "api-1-save-origin", url_args={ "visit_type": "archives", "origin_url": originUrl, }, ) response = check_api_post_response( api_client, url, status_code=200, data={ "archives_data": [ { "artifact_url": artifact_url, "artifact_version": artifact_version, } ] }, ) assert response.data["save_request_status"] == SAVE_REQUEST_ACCEPTED assert SaveAuthorizedOrigin.objects.get(url=originUrl) def test_create_save_request_archives_missing_artifacts_data( api_client, keycloak_oidc, swh_scheduler ): swh_scheduler.add_load_archive_task_type() keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") originUrl = "https://somewhere.org/simple" url = reverse( "api-1-save-origin", url_args={ "visit_type": "archives", "origin_url": originUrl, }, ) response = check_api_post_response( api_client, url, status_code=400, data={}, ) assert "Artifacts data are missing" in response.data["reason"] response = check_api_post_response( api_client, url, status_code=400, data={"archives_data": [{"artifact_url": "", "arttifact_version": "1.0"}]}, ) assert "Missing url or version for an artifact to load" in response.data["reason"] def test_create_save_request_archives_accepted_ambassador_user( api_client, origin_to_review, keycloak_oidc, mocker, swh_scheduler ): keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") check_created_save_request_status( api_client, mocker, origin_to_review, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) assert SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_anonymous_user_no_user_id(api_client, swh_scheduler): origin_url = "https://some.git.hosters/user/repo" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) check_api_post_responses(api_client, url, status_code=200) sor = SaveOriginRequest.objects.get(origin_url=origin_url) assert sor.user_ids is None def test_create_save_request_authenticated_user_id( api_client, keycloak_oidc, swh_scheduler ): oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") origin_url = "https://some.git.hosters/user/repo2" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) response = check_api_post_response(api_client, url, status_code=200) assert response.wsgi_request.user.id is not None user_id = str(response.wsgi_request.user.id) sor = SaveOriginRequest.objects.get(user_ids=f'"{user_id}"') assert sor.user_ids == f'"{user_id}"' def test_create_pending_save_request_multiple_authenticated_users( api_client, swh_scheduler, regular_user, regular_user2 ): origin_url = "https://some.git.hosters/user/repo3" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) api_client.force_login(regular_user) check_api_post_response(api_client, url, status_code=200) api_client.force_login(regular_user2) check_api_post_response(api_client, url, status_code=200) assert SaveOriginRequest.objects.get(user_ids__contains=f'"{regular_user.id}"') assert SaveOriginRequest.objects.get(user_ids__contains=f'"{regular_user2.id}"') def test_reject_origin_url_with_password(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={ "visit_type": "git", "origin_url": "https://user:password@git.example.org/user/repo", }, ) resp = check_api_post_responses(api_client, url, status_code=400) assert resp.data == { "exception": "BadInputExc", "reason": ( "The provided origin url contains a password and cannot " "be accepted for security reasons." ), } def test_accept_origin_url_with_username_but_without_password( api_client, swh_scheduler ): url = reverse( "api-1-save-origin", url_args={ "visit_type": "git", "origin_url": "https://user@git.example.org/user/repo", }, ) check_api_post_responses(api_client, url, status_code=200) @pytest.mark.parametrize( "origin_url", [ "https://anonymous:anonymous@git.example.org/user/repo", "https://anonymous:@git.example.org/user/repo", ], ) def test_accept_origin_url_with_anonymous_credentials( api_client, swh_scheduler, origin_url ): url = reverse( "api-1-save-origin", url_args={ "visit_type": "git", "origin_url": origin_url, }, ) check_api_post_responses(api_client, url, status_code=200) diff --git a/swh/web/tests/misc/test_origin_save.py b/swh/web/tests/save_code_now/test_origin_save_views.py similarity index 96% rename from swh/web/tests/misc/test_origin_save.py rename to swh/web/tests/save_code_now/test_origin_save_views.py index d5bc0384..f5446512 100644 --- a/swh/web/tests/misc/test_origin_save.py +++ b/swh/web/tests/save_code_now/test_origin_save_views.py @@ -1,153 +1,153 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta, timezone import json import pytest from swh.auth.django.utils import oidc_user_from_profile -from swh.web.common.models import SaveOriginRequest -from swh.web.common.origin_save import SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEEDED from swh.web.common.utils import reverse +from swh.web.save_code_now.models import SaveOriginRequest +from swh.web.save_code_now.origin_save import SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEEDED from swh.web.tests.utils import check_http_get_response VISIT_TYPES = ("git", "svn", "hg", "cvs", "bzr") PRIVILEGED_VISIT_TYPES = tuple(list(VISIT_TYPES) + ["archives"]) def test_old_save_url_redirection(client): url = reverse("browse-origin-save") redirect_url = reverse("origin-save") resp = check_http_get_response(client, url, status_code=302) assert resp["location"] == redirect_url @pytest.mark.django_db def test_save_origin_requests_list(client, mocker): nb_origins_per_type = 10 for visit_type in VISIT_TYPES: for i in range(nb_origins_per_type): SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=visit_type, origin_url=f"https://{visit_type}.example.org/project{i}", status=SAVE_REQUEST_ACCEPTED, visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=i, loading_task_status=SAVE_TASK_SUCCEEDED, ) - mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + mock_scheduler = mocker.patch("swh.web.save_code_now.origin_save.scheduler") mock_scheduler.get_tasks.return_value = [] mock_scheduler.get_task_runs.return_value = [] # retrieve all save requests in 3 pages, sorted in descending order # of request creation for i, visit_type in enumerate(reversed(VISIT_TYPES)): url = reverse( "origin-save-requests-list", url_args={"status": "all"}, query_params={ "draw": i + 1, "search[value]": "", "order[0][column]": "0", "columns[0][name]": "request_date", "order[0][dir]": "desc", "length": nb_origins_per_type, "start": i * nb_origins_per_type, }, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/json" ) sors = json.loads(resp.content.decode("utf-8")) assert sors["draw"] == i + 1 assert sors["recordsFiltered"] == len(VISIT_TYPES) * nb_origins_per_type assert sors["recordsTotal"] == len(VISIT_TYPES) * nb_origins_per_type assert len(sors["data"]) == nb_origins_per_type assert all(d["visit_type"] == visit_type for d in sors["data"]) # retrieve save requests filtered by visit type in a single page for i, visit_type in enumerate(reversed(VISIT_TYPES)): url = reverse( "origin-save-requests-list", url_args={"status": "all"}, query_params={ "draw": i + 1, "search[value]": visit_type, "order[0][column]": "0", "columns[0][name]": "request_date", "order[0][dir]": "desc", "length": nb_origins_per_type, "start": 0, }, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/json" ) sors = json.loads(resp.content.decode("utf-8")) assert sors["draw"] == i + 1 assert sors["recordsFiltered"] == nb_origins_per_type assert sors["recordsTotal"] == len(VISIT_TYPES) * nb_origins_per_type assert len(sors["data"]) == nb_origins_per_type assert all(d["visit_type"] == visit_type for d in sors["data"]) @pytest.mark.django_db def test_save_origin_requests_list_user_filter(client, mocker, keycloak_oidc): # anonymous user created a save request sor = SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type="svn", origin_url="https://svn.example.org/user/project", status=SAVE_REQUEST_ACCEPTED, visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=1, loading_task_status=SAVE_TASK_SUCCEEDED, ) # authenticated user created a save request user = oidc_user_from_profile(keycloak_oidc, keycloak_oidc.login()) client.login(code="", code_verifier="", redirect_uri="") sor = SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type="git", origin_url="https://git.example.org/user/project", status=SAVE_REQUEST_ACCEPTED, visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=2, loading_task_status=SAVE_TASK_SUCCEEDED, user_ids=f'"{user.id}"', ) # filter save requests according to user id url = reverse( "origin-save-requests-list", url_args={"status": "all"}, query_params={ "draw": 1, "search[value]": "", "order[0][column]": "0", "columns[0][name]": "request_date", "order[0][dir]": "desc", "length": 10, "start": "0", "user_requests_only": "1", }, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/json" ) sors = json.loads(resp.content.decode("utf-8")) assert sors["recordsFiltered"] == 1 assert sors["recordsTotal"] == 2 assert sors["data"][0] == sor.to_dict() diff --git a/swh/web/common/management/__init__.py b/swh/web/utils/__init__.py similarity index 100% copy from swh/web/common/management/__init__.py copy to swh/web/utils/__init__.py diff --git a/swh/web/common/management/__init__.py b/swh/web/utils/management/__init__.py similarity index 100% copy from swh/web/common/management/__init__.py copy to swh/web/utils/management/__init__.py diff --git a/swh/web/common/management/__init__.py b/swh/web/utils/management/commands/__init__.py similarity index 100% rename from swh/web/common/management/__init__.py rename to swh/web/utils/management/commands/__init__.py diff --git a/swh/web/utils/management/commands/rename_app.py b/swh/web/utils/management/commands/rename_app.py new file mode 100644 index 00000000..8e9333fd --- /dev/null +++ b/swh/web/utils/management/commands/rename_app.py @@ -0,0 +1,64 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +""" +A Django Management Command to rename existing Django Applications. + +Adapted from https://github.com/odwyersoftware/django-rename-app +""" + +import logging + +from django.core.management.base import BaseCommand +from django.db import connection + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = ( + "Renames a Django Application. Usage rename_app [old_app_name] [new_app_name]" + ) + + def add_arguments(self, parser): + parser.add_argument("old_app_name", nargs=1, type=str) + parser.add_argument("new_app_name", nargs=1, type=str) + + def handle(self, old_app_name, new_app_name, *args, **options): + with connection.cursor() as cursor: + old_app_name = old_app_name[0] + new_app_name = new_app_name[0] + + try: + cursor.execute( + "SELECT * FROM django_content_type " + f"where app_label='{new_app_name}'" + ) + has_already_been_ran = cursor.fetchone() + if has_already_been_ran: + logger.info( + "Renaming app %s to %s has already been done, exiting without " + "making any changes", + old_app_name, + new_app_name, + ) + return None + + cursor.execute( + f"UPDATE django_content_type SET app_label='{new_app_name}' " + f"WHERE app_label='{old_app_name}'" + ) + cursor.execute( + f"UPDATE django_migrations SET app='{new_app_name}' " + f"WHERE app='{old_app_name}'" + ) + except Exception as e: + logger.info( + "Error while trying to rename app %s to %s: %s", + old_app_name, + new_app_name, + str(e), + ) + return None