diff --git a/Makefile.local b/Makefile.local
index d6998d37..988d89e5 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -1,125 +1,127 @@
TEST_DIRS := ./swh/web/tests
TESTFLAGS = --hypothesis-profile=swh-web-fast
TESTFULL_FLAGS = --hypothesis-profile=swh-web
YARN ?= yarn
SETTINGS_TEST ?= swh.web.settings.tests
SETTINGS_DEV ?= swh.web.settings.development
SETTINGS_PROD = swh.web.settings.production
yarn-install: package.json
$(YARN) install --frozen-lockfile
.PHONY: build-webpack-dev
build-webpack-dev: yarn-install
$(YARN) build-dev
.PHONY: build-webpack-test
build-webpack-test: yarn-install
$(YARN) build-test
.PHONY: build-webpack-dev-no-verbose
build-webpack-dev-no-verbose: yarn-install
$(YARN) build-dev >/dev/null
.PHONY: build-webpack-prod
build-webpack-prod: yarn-install
$(YARN) build
.PHONY: run-migrations-dev
run-migrations-dev:
+ python3 swh/web/manage.py rename_app --settings=$(SETTINGS_DEV) swh_web_common swh_web_save_code_now
python3 swh/web/manage.py migrate --settings=$(SETTINGS_DEV) -v0
.PHONY: run-migrations-prod
run-migrations-prod:
+ django-admin rename_app --settings=$(SETTINGS_PROD) swh_web_common swh_web_save_code_now
django-admin migrate --settings=$(SETTINGS_PROD) -v0
.PHONY: run-migrations-test
run-migrations-test:
rm -f swh-web-test.sqlite3
django-admin migrate --settings=$(SETTINGS_TEST) -v0
add-users-test: run-migrations-test
cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_TEST)
cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_TEST)
add-users-dev: run-migrations-dev
cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_DEV)
cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_DEV)
add-users-prod: run-migrations-prod
cat swh/web/tests/create_test_admin.py | django-admin shell --settings=$(SETTINGS_PROD)
cat swh/web/tests/create_test_users.py | django-admin shell --settings=$(SETTINGS_PROD)
.PHONY: clear-memcached
clear-memcached:
echo "flush_all" | nc -q 2 localhost 11211 2>/dev/null
run-django-webpack-devserver: add-users-dev yarn-install
bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \
# ensure all child processes will be killed by PGID when exiting \
ps -o pgid= $$$$ | grep -o [0-9]* | xargs pkill -g' SIGINT SIGTERM ERR EXIT; \
$(YARN) start-dev & sleep 10 && cd swh/web && \
python3 manage.py runserver --nostatic --settings=$(SETTINGS_DEV) || exit 1"
run-django-webpack-dev: build-webpack-dev add-users-dev
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_DEV)
run-django-webpack-prod: build-webpack-prod add-users-prod clear-memcached
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_PROD)
run-django-server-dev: add-users-dev
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_DEV)
run-django-server-prod: add-users-prod clear-memcached
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_PROD)
run-gunicorn-server: add-users-prod clear-memcached
DJANGO_SETTINGS_MODULE=$(SETTINGS_PROD) \
gunicorn --bind 127.0.0.1:5004 \
--threads 2 \
--workers 2 'django.core.wsgi:get_wsgi_application()'
run-django-webpack-memory-storages: build-webpack-dev add-users-test
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST)
test-full:
$(TEST) $(TESTFULL_FLAGS) $(TEST_DIRS)
.PHONY: test-frontend-cmd
test-frontend-cmd: build-webpack-test add-users-test
bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \
jobs -p | xargs -r kill' SIGINT SIGTERM ERR EXIT; \
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST) & \
sleep 10 && $(YARN) run cypress run --config numTestsKeptInMemory=0 && \
$(YARN) mochawesome && $(YARN) nyc-report"
test-frontend: export CYPRESS_SKIP_SLOW_TESTS=1
test-frontend: test-frontend-cmd
test-frontend-full: export CYPRESS_SKIP_SLOW_TESTS=0
test-frontend-full: test-frontend-cmd
.PHONY: test-frontend-ui-cmd
test-frontend-ui-cmd: add-users-test yarn-install
# ensure all child processes will be killed when hitting Ctrl-C in terminal
# or manually closing the Cypress UI window, killing by PGID seems the only
# reliable way to do it in that case
bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT && \
ps -o pgid= $$$$ | grep -o [0-9]* | xargs pkill -g' SIGINT SIGTERM ERR EXIT; \
$(YARN) start-dev & \
python3 swh/web/manage.py runserver --nostatic --settings=$(SETTINGS_TEST) & \
sleep 10 && $(YARN) run cypress open"
test-frontend-ui: export CYPRESS_SKIP_SLOW_TESTS=1
test-frontend-ui: test-frontend-ui-cmd
test-frontend-full-ui: export CYPRESS_SKIP_SLOW_TESTS=0
test-frontend-full-ui: test-frontend-ui-cmd
# Override default rule to make sure DJANGO env var is properly set. It
# *should* work without any override thanks to the mypy django-stubs plugin,
# but it currently doesn't; see
# https://github.com/typeddjango/django-stubs/issues/166
check-mypy:
DJANGO_SETTINGS_MODULE=$(SETTINGS_DEV) $(MYPY) $(MYPYFLAGS) swh
diff --git a/swh/web/admin/urls.py b/swh/web/admin/urls.py
index dc8243b1..4ef703a1 100644
--- a/swh/web/admin/urls.py
+++ b/swh/web/admin/urls.py
@@ -1,28 +1,27 @@
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.contrib.auth.views import LoginView
from django.shortcuts import redirect
from django.urls import re_path as url
from swh.web.admin.adminurls import AdminUrls
import swh.web.admin.deposit # noqa
-import swh.web.admin.origin_save # noqa
from swh.web.config import is_feature_enabled
if is_feature_enabled("add_forge_now"):
import swh.web.admin.add_forge_now # noqa
def _admin_default_view(request):
return redirect("admin-origin-save-requests")
urlpatterns = [
url(r"^$", _admin_default_view, name="admin"),
url(r"^login/$", LoginView.as_view(template_name="login.html"), name="login"),
]
urlpatterns += AdminUrls.get_url_patterns()
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
index 4124955a..04297017 100644
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -1,23 +1,23 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+
from swh.web.api.apiurls import APIUrls
import swh.web.api.views.add_forge_now # noqa
import swh.web.api.views.content # noqa
import swh.web.api.views.directory # noqa
import swh.web.api.views.graph # noqa
import swh.web.api.views.identifiers # noqa
import swh.web.api.views.metadata # noqa
import swh.web.api.views.origin # noqa
-import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
import swh.web.api.views.raw # noqa
import swh.web.api.views.release # noqa
import swh.web.api.views.revision # noqa
import swh.web.api.views.snapshot # noqa
import swh.web.api.views.stat # noqa
import swh.web.api.views.vault # noqa
urlpatterns = APIUrls.get_url_patterns()
diff --git a/swh/web/common/__init__.py b/swh/web/common/__init__.py
index 80eb6395..e69de29b 100644
--- a/swh/web/common/__init__.py
+++ b/swh/web/common/__init__.py
@@ -1,6 +0,0 @@
-# Copyright (C) 2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU Affero General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-default_app_config = "swh.web.common.apps.SwhWebCommonConfig"
diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py
index 66deffae..2c54ae54 100644
--- a/swh/web/common/swh_templatetags.py
+++ b/swh/web/common/swh_templatetags.py
@@ -1,149 +1,149 @@
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import re
from django import template
from django.utils.safestring import mark_safe
from swh.web.common.converters import SWHDjangoJSONEncoder
-from swh.web.common.origin_save import get_savable_visit_types
from swh.web.common.utils import rst_to_html
+from swh.web.save_code_now.origin_save import get_savable_visit_types
register = template.Library()
@register.filter
def docstring_display(docstring):
"""
Utility function to htmlize reST-formatted documentation in browsable
api.
"""
return rst_to_html(docstring)
@register.filter
def urlize_links_and_mails(text):
"""Utility function for decorating api links in browsable api.
Args:
text: whose content matching links should be transformed into
contextual API or Browse html links.
Returns
The text transformed if any link is found.
The text as is otherwise.
"""
if 'href="' not in text:
text = re.sub(r"(http.*)", r'\1', text)
return re.sub(r'([^ <>"]+@[^ <>"]+)', r'\1', text)
return text
@register.filter
def urlize_header_links(text):
"""Utility function for decorating headers links in browsable api.
Args
text: Text whose content contains Link header value
Returns:
The text transformed with html link if any link is found.
The text as is otherwise.
"""
ret = re.sub(
r'<(http[^<>]+)>; rel="([^,]+)"', r'<\1>; rel="\2"\n', text
).replace("\n,", "\n")
return ret[:-1]
@register.filter
def jsonify(obj):
"""Utility function for converting a django template variable
to JSON in order to use it in script tags.
Args
obj: Any django template context variable
Returns:
JSON representation of the variable.
"""
return mark_safe(json.dumps(obj, cls=SWHDjangoJSONEncoder))
@register.filter
def sub(value, arg):
"""Django template filter for subtracting two numbers
Args:
value (int/float): the value to subtract from
arg (int/float): the value to subtract to
Returns:
int/float: The subtraction result
"""
return value - arg
@register.filter
def mul(value, arg):
"""Django template filter for multiplying two numbers
Args:
value (int/float): the value to multiply from
arg (int/float): the value to multiply with
Returns:
int/float: The multiplication result
"""
return value * arg
@register.filter
def key_value(dict, key):
"""Django template filter to get a value in a dictionary.
Args:
dict (dict): a dictionary
key (str): the key to lookup value
Returns:
The requested value in the dictionary
"""
return dict[key]
@register.filter
def visit_type_savable(visit_type: str) -> bool:
"""Django template filter to check if a save request can be
created for a given visit type.
Args:
visit_type: the type of visit
Returns:
If the visit type is saveable or not
"""
return visit_type in get_savable_visit_types()
@register.filter
def split(value, arg):
"""Django template filter to split a string.
Args:
value (str): the string to split
arg (str): the split separator
Returns:
list: the split string parts
"""
return value.split(arg)
diff --git a/swh/web/config.py b/swh/web/config.py
index dc8423e9..111455eb 100644
--- a/swh/web/config.py
+++ b/swh/web/config.py
@@ -1,241 +1,242 @@
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from typing import Any, Dict
from swh.core import config
from swh.counters import get_counters
from swh.indexer.storage import get_indexer_storage
from swh.scheduler import get_scheduler
from swh.search import get_search
from swh.storage import get_storage
from swh.vault import get_vault
from swh.web import settings
SWH_WEB_SERVER_NAME = "archive.softwareheritage.org"
SWH_WEB_INTERNAL_SERVER_NAME = "archive.internal.softwareheritage.org"
SWH_WEB_STAGING_SERVER_NAMES = [
"webapp.staging.swh.network",
"webapp.internal.staging.swh.network",
]
SETTINGS_DIR = os.path.dirname(settings.__file__)
DEFAULT_CONFIG = {
"allowed_hosts": ("list", []),
"storage": (
"dict",
{
"cls": "remote",
"url": "http://127.0.0.1:5002/",
"timeout": 10,
},
),
"indexer_storage": (
"dict",
{
"cls": "remote",
"url": "http://127.0.0.1:5007/",
"timeout": 1,
},
),
"counters": (
"dict",
{
"cls": "remote",
"url": "http://127.0.0.1:5011/",
"timeout": 1,
},
),
"search": (
"dict",
{
"cls": "remote",
"url": "http://127.0.0.1:5010/",
"timeout": 10,
},
),
"search_config": (
"dict",
{
"metadata_backend": "swh-indexer-storage",
}, # or "swh-search"
),
"log_dir": ("string", "/tmp/swh/log"),
"debug": ("bool", False),
"serve_assets": ("bool", False),
"host": ("string", "127.0.0.1"),
"port": ("int", 5004),
"secret_key": ("string", "development key"),
# do not display code highlighting for content > 1MB
"content_display_max_size": ("int", 5 * 1024 * 1024),
"snapshot_content_max_size": ("int", 1000),
"throttling": (
"dict",
{
"cache_uri": None, # production: memcached as cache (127.0.0.1:11211)
# development: in-memory cache so None
"scopes": {
"swh_api": {
"limiter_rate": {"default": "120/h"},
"exempted_networks": ["127.0.0.0/8"],
},
"swh_api_origin_search": {
"limiter_rate": {"default": "10/m"},
"exempted_networks": ["127.0.0.0/8"],
},
"swh_vault_cooking": {
"limiter_rate": {"default": "120/h", "GET": "60/m"},
"exempted_networks": ["127.0.0.0/8"],
},
"swh_save_origin": {
"limiter_rate": {"default": "120/h", "POST": "10/h"},
"exempted_networks": ["127.0.0.0/8"],
},
"swh_api_origin_visit_latest": {
"limiter_rate": {"default": "700/m"},
"exempted_networks": ["127.0.0.0/8"],
},
},
},
),
"vault": (
"dict",
{
"cls": "remote",
"args": {
"url": "http://127.0.0.1:5005/",
},
},
),
"scheduler": ("dict", {"cls": "remote", "url": "http://127.0.0.1:5008/"}),
"development_db": ("string", os.path.join(SETTINGS_DIR, "db.sqlite3")),
"test_db": ("dict", {"name": "swh-web-test"}),
"production_db": ("dict", {"name": "swh-web"}),
"deposit": (
"dict",
{
"private_api_url": "https://deposit.softwareheritage.org/1/private/",
"private_api_user": "swhworker",
"private_api_password": "some-password",
},
),
"e2e_tests_mode": ("bool", False),
"es_workers_index_url": ("string", ""),
"history_counters_url": (
"string",
(
"http://counters1.internal.softwareheritage.org:5011"
"/counters_history/history.json"
),
),
"client_config": ("dict", {}),
"keycloak": ("dict", {"server_url": "", "realm_name": ""}),
"graph": (
"dict",
{
"server_url": "http://graph.internal.softwareheritage.org:5009/graph/",
"max_edges": {"staff": 0, "user": 100000, "anonymous": 1000},
},
),
"status": (
"dict",
{
"server_url": "https://status.softwareheritage.org/",
"json_path": "1.0/status/578e5eddcdc0cc7951000520",
},
),
"counters_backend": ("string", "swh-storage"), # or "swh-counters"
"staging_server_names": ("list", SWH_WEB_STAGING_SERVER_NAMES),
"instance_name": ("str", "archive-test.softwareheritage.org"),
"give": ("dict", {"public_key": "", "token": ""}),
"features": ("dict", {"add_forge_now": True}),
"add_forge_now": ("dict", {"email_address": "add-forge-now@example.com"}),
"swh_extra_django_apps": (
"list",
[
"swh.web.inbound_email",
"swh.web.add_forge_now",
"swh.web.mailmap",
+ "swh.web.save_code_now",
],
),
}
swhweb_config: Dict[str, Any] = {}
def get_config(config_file="web/web"):
"""Read the configuration file `config_file`.
If an environment variable SWH_CONFIG_FILENAME is defined, this
takes precedence over the config_file parameter.
In any case, update the app with parameters (secret_key, conf)
and return the parsed configuration as a dict.
If no configuration file is provided, return a default
configuration.
"""
if not swhweb_config:
config_filename = os.environ.get("SWH_CONFIG_FILENAME")
if config_filename:
config_file = config_filename
cfg = config.load_named_config(config_file, DEFAULT_CONFIG)
swhweb_config.update(cfg)
config.prepare_folders(swhweb_config, "log_dir")
if swhweb_config.get("search"):
swhweb_config["search"] = get_search(**swhweb_config["search"])
else:
swhweb_config["search"] = None
swhweb_config["storage"] = get_storage(**swhweb_config["storage"])
swhweb_config["vault"] = get_vault(**swhweb_config["vault"])
swhweb_config["indexer_storage"] = get_indexer_storage(
**swhweb_config["indexer_storage"]
)
swhweb_config["scheduler"] = get_scheduler(**swhweb_config["scheduler"])
swhweb_config["counters"] = get_counters(**swhweb_config["counters"])
return swhweb_config
def search():
"""Return the current application's search."""
return get_config()["search"]
def storage():
"""Return the current application's storage."""
return get_config()["storage"]
def vault():
"""Return the current application's vault."""
return get_config()["vault"]
def indexer_storage():
"""Return the current application's indexer storage."""
return get_config()["indexer_storage"]
def scheduler():
"""Return the current application's scheduler."""
return get_config()["scheduler"]
def counters():
"""Return the current application's counters."""
return get_config()["counters"]
def is_feature_enabled(feature_name: str) -> bool:
"""Determine whether a feature is enabled or not. If feature_name is not found at all,
it's considered disabled.
"""
return get_config()["features"].get(feature_name, False)
diff --git a/swh/web/misc/metrics.py b/swh/web/misc/metrics.py
index 8ee7d39d..95764aad 100644
--- a/swh/web/misc/metrics.py
+++ b/swh/web/misc/metrics.py
@@ -1,21 +1,21 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from prometheus_client.exposition import CONTENT_TYPE_LATEST, generate_latest
from django.http import HttpResponse
-from swh.web.common.origin_save import compute_save_requests_metrics
from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY
+from swh.web.save_code_now.origin_save import compute_save_requests_metrics
def prometheus_metrics(request):
compute_save_requests_metrics()
return HttpResponse(
content=generate_latest(registry=SWH_WEB_METRICS_REGISTRY),
content_type=CONTENT_TYPE_LATEST,
)
diff --git a/swh/web/misc/urls.py b/swh/web/misc/urls.py
index c9b673f8..683258c2 100644
--- a/swh/web/misc/urls.py
+++ b/swh/web/misc/urls.py
@@ -1,121 +1,120 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import requests
from django.conf.urls import include
from django.contrib.staticfiles import finders
from django.http import JsonResponse
from django.shortcuts import render
from django.urls import re_path as url
from django.views.decorators.clickjacking import xframe_options_exempt
from swh.web.common import archive
from swh.web.common.exc import sentry_capture_exception
from swh.web.config import get_config
from swh.web.misc.metrics import prometheus_metrics
def _jslicenses(request):
jslicenses_file = finders.find("jssources/jslicenses.json")
jslicenses_data = json.load(open(jslicenses_file))
jslicenses_data = sorted(
jslicenses_data.items(), key=lambda item: item[0].split("/")[-1]
)
return render(request, "misc/jslicenses.html", {"jslicenses_data": jslicenses_data})
def _stat_counters(request):
stat_counters = archive.stat_counters()
url = get_config()["history_counters_url"]
stat_counters_history = {}
try:
response = requests.get(url, timeout=5)
stat_counters_history = json.loads(response.text)
except Exception as exc:
sentry_capture_exception(exc)
counters = {
"stat_counters": stat_counters,
"stat_counters_history": stat_counters_history,
}
return JsonResponse(counters)
@xframe_options_exempt
def hiring_banner(request):
lang = request.GET.get("lang")
return render(
request,
"misc/hiring-banner-iframe.html",
{
"lang": lang if lang else "en",
},
)
urlpatterns = [
url(r"^", include("swh.web.misc.coverage")),
url(r"^jslicenses/$", _jslicenses, name="jslicenses"),
- url(r"^", include("swh.web.misc.origin_save")),
url(r"^stat_counters/$", _stat_counters, name="stat-counters"),
url(r"^", include("swh.web.misc.badges")),
url(r"^metrics/prometheus/$", prometheus_metrics, name="metrics-prometheus"),
url(r"^", include("swh.web.misc.iframe")),
url(r"^", include("swh.web.misc.fundraising")),
url(r"^hiring/banner/$", hiring_banner, name="swh-hiring-banner"),
]
# when running end to end tests through cypress, declare some extra
# endpoints to provide input data for some of those tests
if get_config()["e2e_tests_mode"]:
from swh.web.tests.views import (
get_content_code_data_all_exts,
get_content_code_data_all_filenames,
get_content_code_data_by_ext,
get_content_code_data_by_filename,
get_content_other_data_by_ext,
)
urlpatterns.append(
url(
r"^tests/data/content/code/extension/(?P.+)/$",
get_content_code_data_by_ext,
name="tests-content-code-extension",
)
)
urlpatterns.append(
url(
r"^tests/data/content/other/extension/(?P.+)/$",
get_content_other_data_by_ext,
name="tests-content-other-extension",
)
)
urlpatterns.append(
url(
r"^tests/data/content/code/extensions/$",
get_content_code_data_all_exts,
name="tests-content-code-extensions",
)
)
urlpatterns.append(
url(
r"^tests/data/content/code/filename/(?P.+)/$",
get_content_code_data_by_filename,
name="tests-content-code-filename",
)
)
urlpatterns.append(
url(
r"^tests/data/content/code/filenames/$",
get_content_code_data_all_filenames,
name="tests-content-code-filenames",
)
)
diff --git a/swh/web/common/__init__.py b/swh/web/save_code_now/__init__.py
similarity index 61%
copy from swh/web/common/__init__.py
copy to swh/web/save_code_now/__init__.py
index 80eb6395..0bff01a9 100644
--- a/swh/web/common/__init__.py
+++ b/swh/web/save_code_now/__init__.py
@@ -1,6 +1,6 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
-default_app_config = "swh.web.common.apps.SwhWebCommonConfig"
+default_app_config = "swh.web.save_code_now.apps.SaveCodeNowConfig"
diff --git a/swh/web/admin/origin_save.py b/swh/web/save_code_now/admin_views.py
similarity index 70%
rename from swh/web/admin/origin_save.py
rename to swh/web/save_code_now/admin_views.py
index 3b722e5f..13296335 100644
--- a/swh/web/admin/origin_save.py
+++ b/swh/web/save_code_now/admin_views.py
@@ -1,220 +1,181 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
from django.conf import settings
from django.contrib.admin.views.decorators import staff_member_required
from django.core.exceptions import ObjectDoesNotExist
from django.core.paginator import Paginator
from django.http import HttpResponse, JsonResponse
from django.shortcuts import render
from django.views.decorators.http import require_POST
-from swh.web.admin.adminurls import admin_route
-from swh.web.common.models import (
+from swh.web.save_code_now.models import (
SaveAuthorizedOrigin,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
-from swh.web.common.origin_save import (
+from swh.web.save_code_now.origin_save import (
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
create_save_origin_request,
)
-@admin_route(r"origin/save/requests/", view_name="admin-origin-save-requests")
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_requests(request):
- return render(request, "admin/origin-save/requests.html")
+def admin_origin_save_requests(request):
+ return render(request, "admin/origin-save-requests.html")
-@admin_route(r"origin/save/filters/", view_name="admin-origin-save-filters")
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_filters(request):
- return render(request, "admin/origin-save/filters.html")
+def admin_origin_save_filters(request):
+ return render(request, "admin/origin-save-filters.html")
def _datatables_origin_urls_response(request, urls_query_set):
search_value = request.GET["search[value]"]
if search_value:
urls_query_set = urls_query_set.filter(url__icontains=search_value)
column_order = request.GET["order[0][column]"]
field_order = request.GET["columns[%s][name]" % column_order]
order_dir = request.GET["order[0][dir]"]
if order_dir == "desc":
field_order = "-" + field_order
urls_query_set = urls_query_set.order_by(field_order)
table_data = {}
table_data["draw"] = int(request.GET["draw"])
table_data["recordsTotal"] = urls_query_set.count()
table_data["recordsFiltered"] = urls_query_set.count()
length = int(request.GET["length"])
page = int(request.GET["start"]) / length + 1
paginator = Paginator(urls_query_set, length)
urls_query_set = paginator.page(page).object_list
table_data["data"] = [{"url": u.url} for u in urls_query_set]
return JsonResponse(table_data)
-@admin_route(
- r"origin/save/authorized_urls/list/",
- view_name="admin-origin-save-authorized-urls-list",
-)
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_authorized_urls_list(request):
+def admin_origin_save_authorized_urls_list(request):
authorized_urls = SaveAuthorizedOrigin.objects.all()
return _datatables_origin_urls_response(request, authorized_urls)
-@admin_route(
- r"origin/save/authorized_urls/add/(?P.+)/",
- view_name="admin-origin-save-add-authorized-url",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_add_authorized_url(request, origin_url):
+def admin_origin_save_add_authorized_url(request, origin_url):
try:
SaveAuthorizedOrigin.objects.get(url=origin_url)
except ObjectDoesNotExist:
# add the new authorized url
SaveAuthorizedOrigin.objects.create(url=origin_url)
# check if pending save requests with that url prefix exist
pending_save_requests = SaveOriginRequest.objects.filter(
origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING
)
# create origin save tasks for previously pending requests
for psr in pending_save_requests:
create_save_origin_request(psr.visit_type, psr.origin_url)
status_code = 200
else:
status_code = 400
return HttpResponse(status=status_code)
-@admin_route(
- r"origin/save/authorized_urls/remove/(?P.+)/",
- view_name="admin-origin-save-remove-authorized-url",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_remove_authorized_url(request, origin_url):
+def admin_origin_save_remove_authorized_url(request, origin_url):
try:
entry = SaveAuthorizedOrigin.objects.get(url=origin_url)
except ObjectDoesNotExist:
status_code = 404
else:
entry.delete()
status_code = 200
return HttpResponse(status=status_code)
-@admin_route(
- r"origin/save/unauthorized_urls/list/",
- view_name="admin-origin-save-unauthorized-urls-list",
-)
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_unauthorized_urls_list(request):
+def admin_origin_save_unauthorized_urls_list(request):
unauthorized_urls = SaveUnauthorizedOrigin.objects.all()
return _datatables_origin_urls_response(request, unauthorized_urls)
-@admin_route(
- r"origin/save/unauthorized_urls/add/(?P.+)/",
- view_name="admin-origin-save-add-unauthorized-url",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_add_unauthorized_url(request, origin_url):
+def admin_origin_save_add_unauthorized_url(request, origin_url):
try:
SaveUnauthorizedOrigin.objects.get(url=origin_url)
except ObjectDoesNotExist:
SaveUnauthorizedOrigin.objects.create(url=origin_url)
# check if pending save requests with that url prefix exist
pending_save_requests = SaveOriginRequest.objects.filter(
origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING
)
# mark pending requests as rejected
for psr in pending_save_requests:
psr.status = SAVE_REQUEST_REJECTED
psr.save()
status_code = 200
else:
status_code = 400
return HttpResponse(status=status_code)
-@admin_route(
- r"origin/save/unauthorized_urls/remove/(?P.+)/",
- view_name="admin-origin-save-remove-unauthorized-url",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_remove_unauthorized_url(request, origin_url):
+def admin_origin_save_remove_unauthorized_url(request, origin_url):
try:
entry = SaveUnauthorizedOrigin.objects.get(url=origin_url)
except ObjectDoesNotExist:
status_code = 404
else:
entry.delete()
status_code = 200
return HttpResponse(status=status_code)
-@admin_route(
- r"origin/save/request/accept/(?P.+)/url/(?P.+)/",
- view_name="admin-origin-save-request-accept",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_request_accept(request, visit_type, origin_url):
+def admin_origin_save_request_accept(request, visit_type, origin_url):
try:
SaveAuthorizedOrigin.objects.get(url=origin_url)
except ObjectDoesNotExist:
SaveAuthorizedOrigin.objects.create(url=origin_url)
create_save_origin_request(visit_type, origin_url)
return HttpResponse(status=200)
-@admin_route(
- r"origin/save/request/reject/(?P.+)/url/(?P.+)/",
- view_name="admin-origin-save-request-reject",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_request_reject(request, visit_type, origin_url):
+def admin_origin_save_request_reject(request, visit_type, origin_url):
try:
sor = SaveOriginRequest.objects.get(
visit_type=visit_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING
)
except ObjectDoesNotExist:
status_code = 404
else:
status_code = 200
sor.status = SAVE_REQUEST_REJECTED
sor.note = json.loads(request.body).get("note")
sor.save()
return HttpResponse(status=status_code)
-@admin_route(
- r"origin/save/request/remove/(?P.+)/",
- view_name="admin-origin-save-request-remove",
-)
@require_POST
@staff_member_required(view_func=None, login_url=settings.LOGIN_URL)
-def _admin_origin_save_request_remove(request, sor_id):
+def admin_origin_save_request_remove(request, sor_id):
try:
entry = SaveOriginRequest.objects.get(id=sor_id)
except ObjectDoesNotExist:
status_code = 404
else:
entry.delete()
status_code = 200
return HttpResponse(status=status_code)
diff --git a/swh/web/api/views/origin_save.py b/swh/web/save_code_now/api_views.py
similarity index 99%
rename from swh/web/api/views/origin_save.py
rename to swh/web/save_code_now/api_views.py
index 1c42e5ea..68d977aa 100644
--- a/swh/web/api/views/origin_save.py
+++ b/swh/web/save_code_now/api_views.py
@@ -1,127 +1,127 @@
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from typing import Optional, cast
from rest_framework.request import Request
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.auth.utils import (
API_SAVE_ORIGIN_PERMISSION,
SWH_AMBASSADOR_PERMISSION,
privileged_user,
)
-from swh.web.common.origin_save import (
+from swh.web.save_code_now.origin_save import (
create_save_origin_request,
get_savable_visit_types,
get_save_origin_requests,
)
def _savable_visit_types() -> str:
docstring = ""
if os.environ.get("DJANGO_SETTINGS_MODULE") != "swh.web.settings.tests":
visit_types = sorted(get_savable_visit_types())
docstring = ""
for visit_type in visit_types[:-1]:
docstring += f"**{visit_type}**, "
docstring += f"and **{visit_types[-1]}**"
return docstring
@api_route(
r"/origin/save/(?P.+)/url/(?P.+)/",
"api-1-save-origin",
methods=["GET", "POST"],
throttle_scope="swh_save_origin",
never_cache=True,
)
@api_doc("/origin/save/")
@format_docstring(visit_types=_savable_visit_types())
def api_save_origin(request: Request, visit_type: str, origin_url: str):
"""
.. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/
.. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/
Request the saving of a software origin into the archive
or check the status of previously created save requests.
That endpoint enables to create a saving task for a software origin
through a POST request.
Depending of the provided origin url, the save request can either be:
* immediately **accepted**, for well known code hosting providers
like for instance GitHub or GitLab
* **rejected**, in case the url is blacklisted by Software Heritage
* **put in pending state** until a manual check is done in order to
determine if it can be loaded or not
Once a saving request has been accepted, its associated saving task
status can then be checked through a GET request on the same url.
Returned status can either be:
* **not created**: no saving task has been created
* **not yet scheduled**: saving task has been created but its
execution has not yet been scheduled
* **scheduled**: the task execution has been scheduled
* **succeeded**: the saving task has been successfully executed
* **failed**: the saving task has been executed but it failed
When issuing a POST request an object will be returned while a GET
request will return an array of objects (as multiple save requests
might have been submitted for the same origin).
:param string visit_type: the type of visit to perform
(currently the supported types are {visit_types})
:param string origin_url: the url of the origin to save
{common_headers}
:>json string origin_url: the url of the origin to save
:>json string visit_type: the type of visit to perform
:>json string save_request_date: the date (in iso format) the save
request was issued
:>json string save_request_status: the status of the save request,
either **accepted**, **rejected** or **pending**
:>json string save_task_status: the status of the origin saving task,
either **not created**, **not yet scheduled**, **scheduled**,
**succeeded** or **failed**
:>json string visit_date: the date (in iso format) of the visit if a visit
occurred, null otherwise.
:>json string visit_status: the status of the visit, either **full**,
**partial**, **not_found** or **failed** if a visit occurred, null
otherwise.
:>json string note: optional note giving details about the save request,
for instance why it has been rejected
:statuscode 200: no error
:statuscode 400: an invalid visit type or origin url has been provided
:statuscode 403: the provided origin url is blacklisted
:statuscode 404: no save requests have been found for a given origin
"""
data = request.data or {}
if request.method == "POST":
sor = create_save_origin_request(
visit_type,
origin_url,
privileged_user(
request,
permissions=[SWH_AMBASSADOR_PERMISSION, API_SAVE_ORIGIN_PERMISSION],
),
user_id=cast(Optional[int], request.user.id),
**data,
)
del sor["id"]
return sor
else:
sors = get_save_origin_requests(visit_type, origin_url)
for sor in sors:
del sor["id"]
return sors
diff --git a/swh/web/common/apps.py b/swh/web/save_code_now/apps.py
similarity index 59%
rename from swh/web/common/apps.py
rename to swh/web/save_code_now/apps.py
index f1e7582e..ad2c49e9 100644
--- a/swh/web/common/apps.py
+++ b/swh/web/save_code_now/apps.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.apps import AppConfig
-class SwhWebCommonConfig(AppConfig):
- name = "swh.web.common"
- label = "swh_web_common"
+class SaveCodeNowConfig(AppConfig):
+ name = "swh.web.save_code_now"
+ label = "swh_web_save_code_now"
diff --git a/swh/web/common/management/__init__.py b/swh/web/save_code_now/management/__init__.py
similarity index 100%
copy from swh/web/common/management/__init__.py
copy to swh/web/save_code_now/management/__init__.py
diff --git a/swh/web/common/management/commands/__init__.py b/swh/web/save_code_now/management/commands/__init__.py
similarity index 100%
rename from swh/web/common/management/commands/__init__.py
rename to swh/web/save_code_now/management/commands/__init__.py
diff --git a/swh/web/common/management/commands/refresh_savecodenow_statuses.py b/swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py
similarity index 93%
rename from swh/web/common/management/commands/refresh_savecodenow_statuses.py
rename to swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py
index e697d92d..f6b81897 100644
--- a/swh/web/common/management/commands/refresh_savecodenow_statuses.py
+++ b/swh/web/save_code_now/management/commands/refresh_savecodenow_statuses.py
@@ -1,63 +1,63 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Set
from django.core.management.base import BaseCommand
from swh.scheduler.model import ListedOrigin
-from swh.web.common.models import VISIT_STATUS_FULL, VISIT_STATUS_PARTIAL
-from swh.web.common.origin_save import refresh_save_origin_request_statuses
from swh.web.config import get_config
from swh.web.config import scheduler as get_scheduler
+from swh.web.save_code_now.models import VISIT_STATUS_FULL, VISIT_STATUS_PARTIAL
+from swh.web.save_code_now.origin_save import refresh_save_origin_request_statuses
class Command(BaseCommand):
help = "Refresh save code now origin request statuses periodically"
def handle(self, *args, **options):
"""Refresh origin save code now requests.
For the origin visit types, svn, git, hg, this also installs the origins as
recurring origins to visit.
"""
refreshed_statuses = refresh_save_origin_request_statuses()
scheduler = get_scheduler()
# then schedule the origins with meaningful status and type to be ingested
# regularly
lister = scheduler.get_or_create_lister(
name="save-code-now", instance_name=get_config()["instance_name"]
)
origins: Set[str, str] = set()
listed_origins = []
for status in refreshed_statuses:
visit_type = status["visit_type"]
# only deal with git, svn, hg visit types
if visit_type == "archives":
continue
# only keep satisfying visit statuses
if status["visit_status"] not in (VISIT_STATUS_PARTIAL, VISIT_STATUS_FULL):
continue
origin = status["origin_url"]
# drop duplicates within the same batch
if (visit_type, origin) in origins:
continue
origins.add((visit_type, origin))
listed_origins.append(
ListedOrigin(lister_id=lister.id, visit_type=visit_type, url=origin)
)
if listed_origins:
scheduler.record_listed_origins(listed_origins)
if len(refreshed_statuses) > 0:
msg = f"Successfully updated {len(refreshed_statuses)} save request(s)."
else:
msg = "Nothing to do."
self.stdout.write(self.style.SUCCESS(msg))
diff --git a/swh/web/common/migrations/0001_initial.py b/swh/web/save_code_now/migrations/0001_initial.py
similarity index 96%
rename from swh/web/common/migrations/0001_initial.py
rename to swh/web/save_code_now/migrations/0001_initial.py
index 30903eee..3963ce61 100644
--- a/swh/web/common/migrations/0001_initial.py
+++ b/swh/web/save_code_now/migrations/0001_initial.py
@@ -1,96 +1,98 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations, models
_authorized_origins = [
"https://github.com/",
"https://gitlab.com/",
"https://bitbucket.org/",
"https://git.code.sf.net/",
"http://git.code.sf.net/",
"https://hg.code.sf.net/",
"http://hg.code.sf.net/",
"https://svn.code.sf.net/",
"http://svn.code.sf.net/",
]
def _populate_save_authorized_origins(apps, schema_editor):
- SaveAuthorizedOrigin = apps.get_model("swh_web_common", "SaveAuthorizedOrigin")
+ SaveAuthorizedOrigin = apps.get_model(
+ "swh_web_save_code_now", "SaveAuthorizedOrigin"
+ )
for origin_url in _authorized_origins:
SaveAuthorizedOrigin.objects.create(url=origin_url)
class Migration(migrations.Migration):
initial = True
operations = [
migrations.CreateModel(
name="SaveAuthorizedOrigin",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("url", models.CharField(max_length=200)),
],
options={
"db_table": "save_authorized_origin",
},
),
migrations.CreateModel(
name="SaveOriginRequest",
fields=[
("id", models.BigAutoField(primary_key=True, serialize=False)),
("request_date", models.DateTimeField(auto_now_add=True)),
("origin_type", models.CharField(max_length=200)),
("origin_url", models.CharField(max_length=200)),
(
"status",
models.TextField(
choices=[
("accepted", "accepted"),
("rejected", "rejected"),
("pending", "pending"),
],
default="pending",
),
),
("loading_task_id", models.IntegerField(default=-1)),
],
options={
"db_table": "save_origin_request",
"ordering": ["-id"],
},
),
migrations.CreateModel(
name="SaveUnauthorizedOrigin",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("url", models.CharField(max_length=200)),
],
options={
"db_table": "save_unauthorized_origin",
},
),
migrations.RunPython(_populate_save_authorized_origins),
]
diff --git a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py b/swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py
similarity index 92%
rename from swh/web/common/migrations/0002_saveoriginrequest_visit_date.py
rename to swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py
index b2792f2e..e203ba7b 100644
--- a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py
+++ b/swh/web/save_code_now/migrations/0002_saveoriginrequest_visit_date.py
@@ -1,23 +1,23 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0001_initial"),
+ ("swh_web_save_code_now", "0001_initial"),
]
operations = [
migrations.AddField(
model_name="saveoriginrequest",
name="visit_date",
field=models.DateTimeField(null=True),
),
]
diff --git a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py b/swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py
similarity index 91%
rename from swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py
rename to swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py
index c539b675..98afdbd0 100644
--- a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py
+++ b/swh/web/save_code_now/migrations/0003_saveoriginrequest_loading_task_status.py
@@ -1,52 +1,52 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations, models
from swh.web.config import scheduler
def _remove_archived_tasks_with_no_saved_status(apps, schema_editor):
"""
Scheduler tasks are archived on a regular basis so their completion
state could not be known anymore as previous to this migration,
the loading task status was not stored in the database.
So remove the rows associated to already archived tasks as
the loading status can not be retrieved anymore.
"""
- SaveOriginRequest = apps.get_model("swh_web_common", "SaveOriginRequest")
+ SaveOriginRequest = apps.get_model("swh_web_save_code_now", "SaveOriginRequest")
no_saved_status_tasks = []
for sor in SaveOriginRequest.objects.all():
tasks = scheduler().get_tasks([sor.loading_task_id])
if not tasks:
no_saved_status_tasks.append(sor.loading_task_id)
SaveOriginRequest.objects.filter(loading_task_id__in=no_saved_status_tasks).delete()
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0002_saveoriginrequest_visit_date"),
+ ("swh_web_save_code_now", "0002_saveoriginrequest_visit_date"),
]
operations = [
migrations.AddField(
model_name="saveoriginrequest",
name="loading_task_status",
field=models.TextField(
choices=[
("not created", "not created"),
("not yet scheduled", "not yet scheduled"),
("scheduled", "scheduled"),
("succeed", "succeed"),
("failed", "failed"),
],
default="not created",
),
),
migrations.RunPython(_remove_archived_tasks_with_no_saved_status),
]
diff --git a/swh/web/common/migrations/0004_auto_20190204_1324.py b/swh/web/save_code_now/migrations/0004_auto_20190204_1324.py
similarity index 92%
rename from swh/web/common/migrations/0004_auto_20190204_1324.py
rename to swh/web/save_code_now/migrations/0004_auto_20190204_1324.py
index 2021a315..f77d460d 100644
--- a/swh/web/common/migrations/0004_auto_20190204_1324.py
+++ b/swh/web/save_code_now/migrations/0004_auto_20190204_1324.py
@@ -1,33 +1,33 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0003_saveoriginrequest_loading_task_status"),
+ ("swh_web_save_code_now", "0003_saveoriginrequest_loading_task_status"),
]
operations = [
migrations.AlterField(
model_name="saveoriginrequest",
name="loading_task_status",
field=models.TextField(
choices=[
("not created", "not created"),
("not yet scheduled", "not yet scheduled"),
("scheduled", "scheduled"),
("succeed", "succeed"),
("failed", "failed"),
("running", "running"),
],
default="not created",
),
),
]
diff --git a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py b/swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py
similarity index 85%
rename from swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py
rename to swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py
index 748c3f53..720f95e3 100644
--- a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py
+++ b/swh/web/save_code_now/migrations/0005_remove_duplicated_authorized_origins.py
@@ -1,25 +1,25 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations
-from swh.web.common.models import SaveAuthorizedOrigin
+from swh.web.save_code_now.models import SaveAuthorizedOrigin
def _remove_duplicated_urls_in_authorized_list(apps, schema_editor):
sao = SaveAuthorizedOrigin.objects
for url in sao.values_list("url", flat=True).distinct():
sao.filter(pk__in=sao.filter(url=url).values_list("id", flat=True)[1:]).delete()
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0004_auto_20190204_1324"),
+ ("swh_web_save_code_now", "0004_auto_20190204_1324"),
]
operations = [migrations.RunPython(_remove_duplicated_urls_in_authorized_list)]
diff --git a/swh/web/common/migrations/0006_rename_origin_type.py b/swh/web/save_code_now/migrations/0006_rename_origin_type.py
similarity index 87%
rename from swh/web/common/migrations/0006_rename_origin_type.py
rename to swh/web/save_code_now/migrations/0006_rename_origin_type.py
index adbf4e6c..0770bf20 100644
--- a/swh/web/common/migrations/0006_rename_origin_type.py
+++ b/swh/web/save_code_now/migrations/0006_rename_origin_type.py
@@ -1,23 +1,23 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import unicode_literals
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0005_remove_duplicated_authorized_origins"),
+ ("swh_web_save_code_now", "0005_remove_duplicated_authorized_origins"),
]
operations = [
migrations.RenameField(
model_name="saveoriginrequest",
old_name="origin_type",
new_name="visit_type",
),
]
diff --git a/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py b/swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py
similarity index 90%
rename from swh/web/common/migrations/0007_save_request_task_status_fix_typo.py
rename to swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py
index 78f2c792..9f86cf80 100644
--- a/swh/web/common/migrations/0007_save_request_task_status_fix_typo.py
+++ b/swh/web/save_code_now/migrations/0007_save_request_task_status_fix_typo.py
@@ -1,43 +1,43 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.db import migrations, models
def _rename_request_status_from_succeed_to_succeeded(apps, schema_editor):
"""
Fix a typo in save request status value.
"""
- SaveOriginRequest = apps.get_model("swh_web_common", "SaveOriginRequest")
+ SaveOriginRequest = apps.get_model("swh_web_save_code_now", "SaveOriginRequest")
for sor in SaveOriginRequest.objects.all():
if sor.loading_task_status == "succeed":
sor.loading_task_status = "succeeded"
sor.save()
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0006_rename_origin_type"),
+ ("swh_web_save_code_now", "0006_rename_origin_type"),
]
operations = [
migrations.AlterField(
model_name="saveoriginrequest",
name="loading_task_status",
field=models.TextField(
choices=[
("not created", "not created"),
("not yet scheduled", "not yet scheduled"),
("scheduled", "scheduled"),
("succeeded", "succeeded"),
("failed", "failed"),
("running", "running"),
],
default="not created",
),
),
migrations.RunPython(_rename_request_status_from_succeed_to_succeeded),
]
diff --git a/swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py b/swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py
similarity index 91%
rename from swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py
rename to swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py
index dd7afbb3..badcd0c1 100644
--- a/swh/web/common/migrations/0008_save-code-now_indexes_20210106_1327.py
+++ b/swh/web/save_code_now/migrations/0008_save-code-now_indexes_20210106_1327.py
@@ -1,29 +1,29 @@
# Generated by Django 2.2.15 on 2021-01-06 13:27
# Adds indexes to the Save Code Now tables.
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0007_save_request_task_status_fix_typo"),
+ ("swh_web_save_code_now", "0007_save_request_task_status_fix_typo"),
]
operations = [
migrations.AddIndex(
model_name="saveauthorizedorigin",
index=models.Index(fields=["url"], name="save_author_url_3e4e9d_idx"),
),
migrations.AddIndex(
model_name="saveoriginrequest",
index=models.Index(
fields=["origin_url", "status"], name="save_origin_origin__b46350_idx"
),
),
migrations.AddIndex(
model_name="saveunauthorizedorigin",
index=models.Index(fields=["url"], name="save_unauth_url_c008fc_idx"),
),
]
diff --git a/swh/web/common/migrations/0009_saveoriginrequest_visit_status.py b/swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py
similarity index 92%
rename from swh/web/common/migrations/0009_saveoriginrequest_visit_status.py
rename to swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py
index a8adf6c1..476a0ad7 100644
--- a/swh/web/common/migrations/0009_saveoriginrequest_visit_status.py
+++ b/swh/web/save_code_now/migrations/0009_saveoriginrequest_visit_status.py
@@ -1,32 +1,32 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# Generated by Django 2.2.19 on 2021-04-19 16:38
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0008_save-code-now_indexes_20210106_1327"),
+ ("swh_web_save_code_now", "0008_save-code-now_indexes_20210106_1327"),
]
operations = [
migrations.AddField(
model_name="saveoriginrequest",
name="visit_status",
field=models.TextField(
choices=[
("created", "created"),
("ongoing", "ongoing"),
("full", "full"),
("partial", "partial"),
("not_found", "not_found"),
("failed", "failed"),
],
null=True,
),
),
]
diff --git a/swh/web/common/migrations/0010_saveoriginrequest_user_id.py b/swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py
similarity index 89%
rename from swh/web/common/migrations/0010_saveoriginrequest_user_id.py
rename to swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py
index d2ceceb9..ecd93bbe 100644
--- a/swh/web/common/migrations/0010_saveoriginrequest_user_id.py
+++ b/swh/web/save_code_now/migrations/0010_saveoriginrequest_user_id.py
@@ -1,22 +1,22 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# Generated by Django 2.2.20 on 2021-05-03 14:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0009_saveoriginrequest_visit_status"),
+ ("swh_web_save_code_now", "0009_saveoriginrequest_visit_status"),
]
operations = [
migrations.AddField(
model_name="saveoriginrequest",
name="user_id",
field=models.CharField(max_length=200, null=True),
),
]
diff --git a/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py b/swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py
similarity index 90%
rename from swh/web/common/migrations/0011_saveoriginrequest_user_ids.py
rename to swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py
index 353c1790..2756cf95 100644
--- a/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py
+++ b/swh/web/save_code_now/migrations/0011_saveoriginrequest_user_ids.py
@@ -1,25 +1,25 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0010_saveoriginrequest_user_id"),
+ ("swh_web_save_code_now", "0010_saveoriginrequest_user_id"),
]
operations = [
migrations.RemoveField(
model_name="saveoriginrequest",
name="user_id",
),
migrations.AddField(
model_name="saveoriginrequest",
name="user_ids",
field=models.TextField(null=True),
),
]
diff --git a/swh/web/common/migrations/0012_saveoriginrequest_note.py b/swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py
similarity index 88%
rename from swh/web/common/migrations/0012_saveoriginrequest_note.py
rename to swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py
index 6df1582f..582c5ed6 100644
--- a/swh/web/common/migrations/0012_saveoriginrequest_note.py
+++ b/swh/web/save_code_now/migrations/0012_saveoriginrequest_note.py
@@ -1,21 +1,21 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
- ("swh_web_common", "0011_saveoriginrequest_user_ids"),
+ ("swh_web_save_code_now", "0011_saveoriginrequest_user_ids"),
]
operations = [
migrations.AddField(
model_name="saveoriginrequest",
name="note",
field=models.TextField(null=True),
),
]
diff --git a/swh/web/common/migrations/__init__.py b/swh/web/save_code_now/migrations/__init__.py
similarity index 100%
rename from swh/web/common/migrations/__init__.py
rename to swh/web/save_code_now/migrations/__init__.py
diff --git a/swh/web/common/models.py b/swh/web/save_code_now/models.py
similarity index 95%
rename from swh/web/common/models.py
rename to swh/web/save_code_now/models.py
index fc2738aa..52c0ea09 100644
--- a/swh/web/common/models.py
+++ b/swh/web/save_code_now/models.py
@@ -1,135 +1,135 @@
-# Copyright (C) 2018-2021 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.db import models
from swh.web.common.typing import SaveOriginRequestInfo
class SaveAuthorizedOrigin(models.Model):
"""
Model table holding origin urls authorized to be loaded into the archive.
"""
url = models.CharField(max_length=200, null=False)
class Meta:
- app_label = "swh_web_common"
+ app_label = "swh_web_save_code_now"
db_table = "save_authorized_origin"
indexes = [models.Index(fields=["url"])]
def __str__(self):
return self.url
class SaveUnauthorizedOrigin(models.Model):
"""
Model table holding origin urls not authorized to be loaded into the
archive.
"""
url = models.CharField(max_length=200, null=False)
class Meta:
- app_label = "swh_web_common"
+ app_label = "swh_web_save_code_now"
db_table = "save_unauthorized_origin"
indexes = [models.Index(fields=["url"])]
def __str__(self):
return self.url
SAVE_REQUEST_ACCEPTED = "accepted"
SAVE_REQUEST_REJECTED = "rejected"
SAVE_REQUEST_PENDING = "pending"
SAVE_REQUEST_STATUS = [
(SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED),
(SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED),
(SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING),
]
SAVE_TASK_NOT_CREATED = "not created"
SAVE_TASK_NOT_YET_SCHEDULED = "not yet scheduled"
SAVE_TASK_SCHEDULED = "scheduled"
SAVE_TASK_SUCCEEDED = "succeeded"
SAVE_TASK_FAILED = "failed"
SAVE_TASK_RUNNING = "running"
SAVE_TASK_STATUS = [
(SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED),
(SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED),
(SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED),
(SAVE_TASK_SUCCEEDED, SAVE_TASK_SUCCEEDED),
(SAVE_TASK_FAILED, SAVE_TASK_FAILED),
(SAVE_TASK_RUNNING, SAVE_TASK_RUNNING),
]
VISIT_STATUS_CREATED = "created"
VISIT_STATUS_ONGOING = "ongoing"
VISIT_STATUS_FULL = "full"
VISIT_STATUS_PARTIAL = "partial"
VISIT_STATUS_NOT_FOUND = "not_found"
VISIT_STATUS_FAILED = "failed"
VISIT_STATUSES = [
(VISIT_STATUS_CREATED, VISIT_STATUS_CREATED),
(VISIT_STATUS_ONGOING, VISIT_STATUS_ONGOING),
(VISIT_STATUS_FULL, VISIT_STATUS_FULL),
(VISIT_STATUS_PARTIAL, VISIT_STATUS_PARTIAL),
(VISIT_STATUS_NOT_FOUND, VISIT_STATUS_NOT_FOUND),
(VISIT_STATUS_FAILED, VISIT_STATUS_FAILED),
]
class SaveOriginRequest(models.Model):
"""
Model table holding all the save origin requests issued by users.
"""
id = models.BigAutoField(primary_key=True)
request_date = models.DateTimeField(auto_now_add=True)
visit_type = models.CharField(max_length=200, null=False)
visit_status = models.TextField(choices=VISIT_STATUSES, null=True)
origin_url = models.CharField(max_length=200, null=False)
status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING)
loading_task_id = models.IntegerField(default=-1)
visit_date = models.DateTimeField(null=True)
loading_task_status = models.TextField(
choices=SAVE_TASK_STATUS, default=SAVE_TASK_NOT_CREATED
)
# store ids of users that submitted the request as string list
user_ids = models.TextField(null=True)
note = models.TextField(null=True)
class Meta:
- app_label = "swh_web_common"
+ app_label = "swh_web_save_code_now"
db_table = "save_origin_request"
ordering = ["-id"]
indexes = [models.Index(fields=["origin_url", "status"])]
def to_dict(self) -> SaveOriginRequestInfo:
"""Map the request save model object to a json serializable dict.
Returns:
The corresponding SaveOriginRequetsInfo json serializable dict.
"""
visit_date = self.visit_date
return SaveOriginRequestInfo(
id=self.id,
origin_url=self.origin_url,
visit_type=self.visit_type,
save_request_date=self.request_date.isoformat(),
save_request_status=self.status,
save_task_status=self.loading_task_status,
visit_status=self.visit_status,
visit_date=visit_date.isoformat() if visit_date else None,
loading_task_id=self.loading_task_id,
note=self.note,
)
def __str__(self) -> str:
return str(self.to_dict())
diff --git a/swh/web/common/origin_save.py b/swh/web/save_code_now/origin_save.py
similarity index 99%
rename from swh/web/common/origin_save.py
rename to swh/web/save_code_now/origin_save.py
index da88b144..9fffd110 100644
--- a/swh/web/common/origin_save.py
+++ b/swh/web/save_code_now/origin_save.py
@@ -1,942 +1,942 @@
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta, timezone
from functools import lru_cache
from itertools import product
import json
import logging
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse
from prometheus_client import Gauge
import requests
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.validators import URLValidator
from django.db.models import Q, QuerySet
from django.utils.html import escape
from swh.scheduler.utils import create_oneshot_task_dict
from swh.web.common import archive
from swh.web.common.exc import (
BadInputExc,
ForbiddenExc,
NotFoundExc,
sentry_capture_exception,
)
-from swh.web.common.models import (
+from swh.web.common.typing import OriginExistenceCheckInfo, SaveOriginRequestInfo
+from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc
+from swh.web.config import get_config, scheduler
+from swh.web.save_code_now.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
VISIT_STATUS_CREATED,
VISIT_STATUS_ONGOING,
SaveAuthorizedOrigin,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
-from swh.web.common.typing import OriginExistenceCheckInfo, SaveOriginRequestInfo
-from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc
-from swh.web.config import get_config, scheduler
logger = logging.getLogger(__name__)
# Number of days in the past to lookup for information
MAX_THRESHOLD_DAYS = 30
# Non terminal visit statuses which needs updates
NON_TERMINAL_STATUSES = [
VISIT_STATUS_CREATED,
VISIT_STATUS_ONGOING,
]
def get_origin_save_authorized_urls() -> List[str]:
"""
Get the list of origin url prefixes authorized to be
immediately loaded into the archive (whitelist).
Returns:
list: The list of authorized origin url prefix
"""
return [origin.url for origin in SaveAuthorizedOrigin.objects.all()]
def get_origin_save_unauthorized_urls() -> List[str]:
"""
Get the list of origin url prefixes forbidden to be
loaded into the archive (blacklist).
Returns:
list: the list of unauthorized origin url prefix
"""
return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()]
def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str:
"""
Check if a software origin can be saved into the archive.
Based on the origin url, the save request will be either:
* immediately accepted if the url is whitelisted
* rejected if the url is blacklisted
* put in pending state for manual review otherwise
Args:
origin_url (str): the software origin url to check
Returns:
str: the origin save request status, either **accepted**,
**rejected** or **pending**
"""
# origin url may be blacklisted
for url_prefix in get_origin_save_unauthorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_REJECTED
# if the origin url is in the white list, it can be immediately saved
for url_prefix in get_origin_save_authorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_ACCEPTED
# otherwise, the origin url needs to be manually verified if the user
# that submitted it does not have special permission
if bypass_pending_review:
# mark the origin URL as trusted in that case
SaveAuthorizedOrigin.objects.get_or_create(url=origin_url)
return SAVE_REQUEST_ACCEPTED
else:
return SAVE_REQUEST_PENDING
# map visit type to scheduler task
# TODO: do not hardcode the task name here (T1157)
_visit_type_task = {
"git": "load-git",
"hg": "load-hg",
"svn": "load-svn",
"cvs": "load-cvs",
"bzr": "load-bzr",
}
_visit_type_task_privileged = {
"archives": "load-archive-files",
}
# map scheduler task status to origin save status
_save_task_status = {
"next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED,
"next_run_scheduled": SAVE_TASK_SCHEDULED,
"completed": SAVE_TASK_SUCCEEDED,
"disabled": SAVE_TASK_FAILED,
}
# map scheduler task_run status to origin save status
_save_task_run_status = {
"scheduled": SAVE_TASK_SCHEDULED,
"started": SAVE_TASK_RUNNING,
"eventful": SAVE_TASK_SUCCEEDED,
"uneventful": SAVE_TASK_SUCCEEDED,
"failed": SAVE_TASK_FAILED,
"permfailed": SAVE_TASK_FAILED,
"lost": SAVE_TASK_FAILED,
}
@lru_cache()
def get_scheduler_load_task_types() -> List[str]:
task_types = scheduler().get_task_types()
return [t["type"] for t in task_types if t["type"].startswith("load")]
def get_savable_visit_types_dict(privileged_user: bool = False) -> Dict:
"""Returned the supported task types the user has access to.
Args:
privileged_user: Flag to determine if all visit types should be returned or not.
Default to False to only list unprivileged visit types.
Returns:
the dict of supported visit types for the user
"""
if privileged_user:
task_types = {**_visit_type_task, **_visit_type_task_privileged}
else:
task_types = _visit_type_task
# filter visit types according to scheduler load task types if available
try:
load_task_types = get_scheduler_load_task_types()
return {k: v for k, v in task_types.items() if v in load_task_types}
except Exception:
return task_types
def get_savable_visit_types(privileged_user: bool = False) -> List[str]:
"""Return the list of visit types the user can perform save requests on.
Args:
privileged_user: Flag to determine if all visit types should be returned or not.
Default to False to only list unprivileged visit types.
Returns:
the list of saveable visit types
"""
return sorted(list(get_savable_visit_types_dict(privileged_user).keys()))
def _check_visit_type_savable(visit_type: str, privileged_user: bool = False) -> None:
visit_type_tasks = get_savable_visit_types(privileged_user)
if visit_type not in visit_type_tasks:
allowed_visit_types = ", ".join(visit_type_tasks)
raise BadInputExc(
f"Visit of type {visit_type} can not be saved! "
f"Allowed types are the following: {allowed_visit_types}"
)
_validate_url = URLValidator(
schemes=["http", "https", "svn", "git", "rsync", "pserver", "ssh", "bzr"]
)
def _check_origin_url_valid(origin_url: str) -> None:
try:
_validate_url(origin_url)
except ValidationError:
raise BadInputExc(
f"The provided origin url ({escape(origin_url)}) is not valid!"
)
parsed_url = urlparse(origin_url)
if parsed_url.password not in (None, "", "anonymous"):
raise BadInputExc(
"The provided origin url contains a password and cannot be "
"accepted for security reasons."
)
def origin_exists(origin_url: str) -> OriginExistenceCheckInfo:
"""Check the origin url for existence. If it exists, extract some more useful
information on the origin.
"""
resp = requests.head(origin_url, allow_redirects=True)
exists = resp.ok
content_length: Optional[int] = None
last_modified: Optional[str] = None
if exists:
# Also process X-Archive-Orig-* headers in case the URL targets the
# Internet Archive.
size_ = resp.headers.get(
"Content-Length", resp.headers.get("X-Archive-Orig-Content-Length")
)
content_length = int(size_) if size_ else None
try:
date_str = resp.headers.get(
"Last-Modified", resp.headers.get("X-Archive-Orig-Last-Modified", "")
)
date = datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %Z")
last_modified = date.isoformat()
except ValueError:
# if not provided or not parsable as per the expected format, keep it None
pass
return OriginExistenceCheckInfo(
origin_url=origin_url,
exists=exists,
last_modified=last_modified,
content_length=content_length,
)
def _check_origin_exists(url: str) -> OriginExistenceCheckInfo:
"""Ensure an URL exists, if not raise an explicit message."""
metadata = origin_exists(url)
if not metadata["exists"]:
raise BadInputExc(f"The provided url ({escape(url)}) does not exist!")
return metadata
def _get_visit_info_for_save_request(
save_request: SaveOriginRequest,
) -> Tuple[Optional[datetime], Optional[str]]:
"""Retrieve visit information out of a save request
Args:
save_request: Input save origin request to retrieve information for.
Returns:
Tuple of (visit date, optional visit status) for such save request origin
"""
visit_date = None
visit_status = None
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# stop trying to find a visit date one month after save request submission
# as those requests to storage are expensive and associated loading task
# surely ended up with errors
if time_delta.days <= MAX_THRESHOLD_DAYS:
origin = save_request.origin_url
ovs = archive.origin_visit_find_by_date(origin, save_request.request_date)
if ovs:
visit_date = parse_iso8601_date_to_utc(ovs["date"])
visit_status = ovs["status"]
return visit_date, visit_status
def _check_visit_update_status(
save_request: SaveOriginRequest,
) -> Tuple[Optional[datetime], Optional[str], Optional[str]]:
"""Given a save request, determine whether a save request was successful or failed.
Args:
save_request: Input save origin request to retrieve information for.
Returns:
Tuple of (optional visit date, optional visit status, optional save task status)
for such save request origin
"""
visit_date, visit_status = _get_visit_info_for_save_request(save_request)
loading_task_status = None
if visit_date and visit_status in ("full", "partial"):
# visit has been performed, mark the saving task as succeeded
loading_task_status = SAVE_TASK_SUCCEEDED
elif visit_status in ("created", "ongoing"):
# visit is currently running
loading_task_status = SAVE_TASK_RUNNING
elif visit_status in ("not_found", "failed"):
loading_task_status = SAVE_TASK_FAILED
else:
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# consider the task as failed if it is still in scheduled state
# 30 days after its submission
if time_delta.days > MAX_THRESHOLD_DAYS:
loading_task_status = SAVE_TASK_FAILED
return visit_date, visit_status, loading_task_status
def _compute_task_loading_status(
task: Optional[Dict[str, Any]] = None,
task_run: Optional[Dict[str, Any]] = None,
) -> Optional[str]:
loading_task_status: Optional[str] = None
# First determine the loading task status out of task information
if task:
loading_task_status = _save_task_status[task["status"]]
if task_run:
loading_task_status = _save_task_run_status[task_run["status"]]
return loading_task_status
def _update_save_request_info(
save_request: SaveOriginRequest,
task: Optional[Dict[str, Any]] = None,
task_run: Optional[Dict[str, Any]] = None,
) -> SaveOriginRequestInfo:
"""Update save request information out of the visit status and fallback to the task and
task_run information if the visit status is missing.
Args:
save_request: Save request
task: Associated scheduler task information about the save request
task_run: Most recent run occurrence of the associated task
Returns:
Summary of the save request information updated.
"""
must_save = False
# To determine the save code now request's final status, the visit date must be set
# and the visit status must be a final one. Once they do, the save code now is
# definitely done.
if (
not save_request.visit_date
or not save_request.visit_status
or save_request.visit_status in NON_TERMINAL_STATUSES
):
visit_date, visit_status, loading_task_status = _check_visit_update_status(
save_request
)
if not loading_task_status: # fallback when not provided
loading_task_status = _compute_task_loading_status(task, task_run)
if visit_date != save_request.visit_date:
must_save = True
save_request.visit_date = visit_date
if visit_status != save_request.visit_status:
must_save = True
save_request.visit_status = visit_status
if (
loading_task_status is not None
and loading_task_status != save_request.loading_task_status
):
must_save = True
save_request.loading_task_status = loading_task_status
if must_save:
save_request.save()
return save_request.to_dict()
def create_save_origin_request(
visit_type: str,
origin_url: str,
privileged_user: bool = False,
user_id: Optional[int] = None,
**kwargs,
) -> SaveOriginRequestInfo:
"""Create a loading task to save a software origin into the archive.
This function aims to create a software origin loading task through the use of the
swh-scheduler component.
First, some checks are performed to see if the visit type and origin url are valid
but also if the the save request can be accepted. For the 'archives' visit type,
this also ensures the artifacts actually exists. If those checks passed, the loading
task is then created. Otherwise, the save request is put in pending or rejected
state.
All the submitted save requests are logged into the swh-web database to keep track
of them.
Args:
visit_type: the type of visit to perform (e.g. git, hg, svn, archives, ...)
origin_url: the url of the origin to save
privileged: Whether the user has some more privilege than other (bypass
review, access to privileged other visit types)
user_id: User identifier (provided when authenticated)
kwargs: Optional parameters (e.g. artifact_url, artifact_filename,
artifact_version)
Raises:
BadInputExc: the visit type or origin url is invalid or inexistent
ForbiddenExc: the provided origin url is blacklisted
Returns:
dict: A dict describing the save request with the following keys:
* **visit_type**: the type of visit to perform
* **origin_url**: the url of the origin
* **save_request_date**: the date the request was submitted
* **save_request_status**: the request status, either **accepted**,
**rejected** or **pending**
* **save_task_status**: the origin loading task status, either
**not created**, **not yet scheduled**, **scheduled**,
**succeed** or **failed**
"""
visit_type_tasks = get_savable_visit_types_dict(privileged_user)
_check_visit_type_savable(visit_type, privileged_user)
_check_origin_url_valid(origin_url)
# if all checks passed so far, we can try and save the origin
save_request_status = can_save_origin(origin_url, privileged_user)
task = None
# if the origin save request is accepted, create a scheduler
# task to load it into the archive
if save_request_status == SAVE_REQUEST_ACCEPTED:
# create a task with high priority
task_kwargs: Dict[str, Any] = {
"priority": "high",
"url": origin_url,
}
if visit_type == "archives":
# extra arguments for that type are required
archives_data = kwargs.get("archives_data", [])
if not archives_data:
raise BadInputExc(
"Artifacts data are missing for the archives visit type."
)
artifacts = []
for artifact in archives_data:
artifact_url = artifact.get("artifact_url")
artifact_version = artifact.get("artifact_version")
if not artifact_url or not artifact_version:
raise BadInputExc("Missing url or version for an artifact to load.")
metadata = _check_origin_exists(artifact_url)
artifacts.append(
{
"url": artifact_url,
"version": artifact_version,
"time": metadata["last_modified"],
"length": metadata["content_length"],
}
)
task_kwargs = dict(**task_kwargs, artifacts=artifacts, snapshot_append=True)
sor = None
# get list of previously submitted save requests (most recent first)
current_sors = list(
SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
).order_by("-request_date")
)
can_create_task = False
# if no save requests previously submitted, create the scheduler task
if not current_sors:
can_create_task = True
else:
# get the latest submitted save request
sor = current_sors[0]
# if it was in pending state, we need to create the scheduler task
# and update the save request info in the database
if sor.status == SAVE_REQUEST_PENDING:
can_create_task = True
# a task has already been created to load the origin
elif sor.loading_task_id != -1:
# get the scheduler task and its status
tasks = scheduler().get_tasks([sor.loading_task_id])
task = tasks[0] if tasks else None
task_runs = scheduler().get_task_runs([sor.loading_task_id])
task_run = task_runs[0] if task_runs else None
save_request_info = _update_save_request_info(sor, task, task_run)
task_status = save_request_info["save_task_status"]
# create a new scheduler task only if the previous one has been
# already executed
if (
task_status == SAVE_TASK_FAILED
or task_status == SAVE_TASK_SUCCEEDED
):
can_create_task = True
sor = None
else:
can_create_task = False
if can_create_task:
# effectively create the scheduler task
task_dict = create_oneshot_task_dict(
visit_type_tasks[visit_type], **task_kwargs
)
task = scheduler().create_tasks([task_dict])[0]
# pending save request has been accepted
if sor:
sor.status = SAVE_REQUEST_ACCEPTED
sor.loading_task_id = task["id"]
sor.save()
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
loading_task_id=task["id"],
user_ids=f'"{user_id}"' if user_id else None,
)
# save request must be manually reviewed for acceptation
elif save_request_status == SAVE_REQUEST_PENDING:
# check if there is already such a save request already submitted,
# no need to add it to the database in that case
try:
sor = SaveOriginRequest.objects.get(
visit_type=visit_type, origin_url=origin_url, status=save_request_status
)
user_ids = sor.user_ids if sor.user_ids is not None else ""
if user_id is not None and f'"{user_id}"' not in user_ids:
# update user ids list
sor.user_ids = f'{sor.user_ids},"{user_id}"'
sor.save()
# if not add it to the database
except ObjectDoesNotExist:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
user_ids=f'"{user_id}"' if user_id else None,
)
# origin can not be saved as its url is blacklisted,
# log the request to the database anyway
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
user_ids=f'"{user_id}"' if user_id else None,
)
if save_request_status == SAVE_REQUEST_REJECTED:
raise ForbiddenExc(
(
'The "save code now" request has been rejected '
"because the provided origin url is blacklisted."
)
)
assert sor is not None
return _update_save_request_info(sor, task)
def update_save_origin_requests_from_queryset(
requests_queryset: QuerySet,
) -> List[SaveOriginRequestInfo]:
"""Update all save requests from a SaveOriginRequest queryset, update their status in db
and return the list of impacted save_requests.
Args:
requests_queryset: input SaveOriginRequest queryset
Returns:
list: A list of save origin request info dicts as described in
- :func:`swh.web.common.origin_save.create_save_origin_request`
+ :func:`swh.web.save_code_now.origin_save.create_save_origin_request`
"""
task_ids = []
for sor in requests_queryset:
task_ids.append(sor.loading_task_id)
save_requests = []
if task_ids:
try:
tasks = scheduler().get_tasks(task_ids)
tasks = {task["id"]: task for task in tasks}
task_runs = scheduler().get_task_runs(tasks)
task_runs = {task_run["task"]: task_run for task_run in task_runs}
except Exception:
# allow to avoid mocking api GET responses for /origin/save endpoint when
# running cypress tests as scheduler is not available
tasks = {}
task_runs = {}
for sor in requests_queryset:
sr_dict = _update_save_request_info(
sor,
tasks.get(sor.loading_task_id),
task_runs.get(sor.loading_task_id),
)
save_requests.append(sr_dict)
return save_requests
def refresh_save_origin_request_statuses() -> List[SaveOriginRequestInfo]:
"""Refresh non-terminal save origin requests (SOR) in the backend.
Non-terminal SOR are requests whose status is **accepted** and their task status are
either **created**, **not yet scheduled**, **scheduled** or **running**.
This shall compute this list of SOR, checks their status in the scheduler and
optionally elasticsearch for their current status. Then update those in db.
Finally, this returns the refreshed information on those SOR.
"""
pivot_date = datetime.now(tz=timezone.utc) - timedelta(days=MAX_THRESHOLD_DAYS)
save_requests = SaveOriginRequest.objects.filter(
# Retrieve accepted request statuses (all statuses)
Q(status=SAVE_REQUEST_ACCEPTED),
# those without the required information we need to update
Q(visit_date__isnull=True)
| Q(visit_status__isnull=True)
| Q(visit_status__in=NON_TERMINAL_STATUSES),
# limit results to recent ones (that is roughly 30 days old at best)
Q(request_date__gte=pivot_date),
)
return (
update_save_origin_requests_from_queryset(save_requests)
if save_requests.count() > 0
else []
)
def get_save_origin_requests(
visit_type: str, origin_url: str
) -> List[SaveOriginRequestInfo]:
"""
Get all save requests for a given software origin.
Args:
visit_type: the type of visit
origin_url: the url of the origin
Raises:
BadInputExc: the visit type or origin url is invalid
swh.web.common.exc.NotFoundExc: no save requests can be found for the
given origin
Returns:
list: A list of save origin requests dict as described in
- :func:`swh.web.common.origin_save.create_save_origin_request`
+ :func:`swh.web.save_code_now.origin_save.create_save_origin_request`
"""
_check_visit_type_savable(visit_type)
_check_origin_url_valid(origin_url)
sors = SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
)
if sors.count() == 0:
raise NotFoundExc(
f"No save requests found for visit of type {visit_type} "
f"on origin with url {origin_url}."
)
return update_save_origin_requests_from_queryset(sors)
def get_save_origin_task_info(
save_request_id: int, full_info: bool = True
) -> Dict[str, Any]:
"""
Get detailed information about an accepted save origin request
and its associated loading task.
If the associated loading task info is archived and removed
from the scheduler database, returns an empty dictionary.
Args:
save_request_id: identifier of a save origin request
full_info: whether to return detailed info for staff users
Returns:
A dictionary with the following keys:
- **type**: loading task type
- **arguments**: loading task arguments
- **id**: loading task database identifier
- **backend_id**: loading task celery identifier
- **scheduled**: loading task scheduling date
- **ended**: loading task termination date
- **status**: loading task execution status
- **visit_status**: Actual visit status
Depending on the availability of the task logs in the elasticsearch
cluster of Software Heritage, the returned dictionary may also
contain the following keys:
- **name**: associated celery task name
- **message**: relevant log message from task execution
- **duration**: task execution time (only if it succeeded)
- **worker**: name of the worker that executed the task
"""
try:
save_request = SaveOriginRequest.objects.get(id=save_request_id)
except ObjectDoesNotExist:
return {}
task_info: Dict[str, Any] = {}
if save_request.note is not None:
task_info["note"] = save_request.note
try:
task = scheduler().get_tasks([save_request.loading_task_id])
except Exception:
# to avoid mocking GET responses of /save/task/info/ endpoint when running
# cypress tests as scheduler is not available in that case
task = None
task = task[0] if task else None
if task is None:
return task_info
task_run = scheduler().get_task_runs([task["id"]])
task_run = task_run[0] if task_run else None
if task_run is None:
return task_info
task_info.update(task_run)
task_info["type"] = task["type"]
task_info["arguments"] = task["arguments"]
task_info["id"] = task_run["task"]
del task_info["task"]
del task_info["metadata"]
# Enrich the task info with the loading visit status
task_info["visit_status"] = save_request.visit_status
es_workers_index_url = get_config()["es_workers_index_url"]
if not es_workers_index_url:
return task_info
es_workers_index_url += "/_search"
if save_request.visit_date:
min_ts = save_request.visit_date
max_ts = min_ts + timedelta(days=7)
else:
min_ts = save_request.request_date
max_ts = min_ts + timedelta(days=MAX_THRESHOLD_DAYS)
min_ts_unix = int(min_ts.timestamp()) * 1000
max_ts_unix = int(max_ts.timestamp()) * 1000
save_task_status = _save_task_status[task["status"]]
priority = "3" if save_task_status == SAVE_TASK_FAILED else "6"
query = {
"bool": {
"must": [
{"match_phrase": {"syslog.priority": {"query": priority}}},
{
"match_phrase": {
"journald.custom.swh_task_id": {"query": task_run["backend_id"]}
}
},
{
"range": {
"@timestamp": {
"gte": min_ts_unix,
"lte": max_ts_unix,
"format": "epoch_millis",
}
}
},
]
}
}
try:
response = requests.post(
es_workers_index_url,
json={"query": query, "sort": ["@timestamp"]},
timeout=30,
)
results = json.loads(response.text)
if results["hits"]["total"]["value"] >= 1:
task_run_info = results["hits"]["hits"][-1]["_source"]
journald_custom = task_run_info.get("journald", {}).get("custom", {})
task_info["duration"] = journald_custom.get(
"swh_logging_args_runtime", "not available"
)
task_info["message"] = task_run_info.get("message", "not available")
task_info["name"] = journald_custom.get("swh_task_name", "not available")
task_info["worker"] = task_run_info.get("host", {}).get("hostname")
except Exception as exc:
logger.warning("Request to Elasticsearch failed\n%s", exc)
sentry_capture_exception(exc)
if not full_info:
for field in ("id", "backend_id", "worker"):
# remove some staff only fields
task_info.pop(field, None)
if "message" in task_run and "Loading failure" in task_run["message"]:
# hide traceback for non staff users, only display exception
message_lines = task_info["message"].split("\n")
message = ""
for line in message_lines:
if line.startswith("Traceback"):
break
message += f"{line}\n"
message += message_lines[-1]
task_info["message"] = message
return task_info
SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests"
_submitted_save_requests_gauge = Gauge(
name=SUBMITTED_SAVE_REQUESTS_METRIC,
documentation="Number of submitted origin save requests",
labelnames=["status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests"
_accepted_save_requests_gauge = Gauge(
name=ACCEPTED_SAVE_REQUESTS_METRIC,
documentation="Number of accepted origin save requests",
labelnames=["load_task_status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
# Metric on the delay of save code now request per status and visit_type. This is the
# time difference between the save code now is requested and the time it got ingested.
ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds"
_accepted_save_requests_delay_gauge = Gauge(
name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC,
documentation="Save Requests Duration",
labelnames=["load_task_status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
def compute_save_requests_metrics() -> None:
"""Compute Prometheus metrics related to origin save requests:
- Number of submitted origin save requests
- Number of accepted origin save requests
- Save Code Now requests delay between request time and actual time of ingestion
"""
request_statuses = (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_REJECTED,
SAVE_REQUEST_PENDING,
)
load_task_statuses = (
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
SAVE_TASK_FAILED,
SAVE_TASK_RUNNING,
)
# for metrics, we want access to all visit types
visit_types = get_savable_visit_types(privileged_user=True)
labels_set = product(request_statuses, visit_types)
for labels in labels_set:
_submitted_save_requests_gauge.labels(*labels).set(0)
labels_set = product(load_task_statuses, visit_types)
for labels in labels_set:
_accepted_save_requests_gauge.labels(*labels).set(0)
duration_load_task_statuses = (
SAVE_TASK_FAILED,
SAVE_TASK_SUCCEEDED,
)
for labels in product(duration_load_task_statuses, visit_types):
_accepted_save_requests_delay_gauge.labels(*labels).set(0)
for sor in SaveOriginRequest.objects.all():
if sor.status == SAVE_REQUEST_ACCEPTED:
_accepted_save_requests_gauge.labels(
load_task_status=sor.loading_task_status,
visit_type=sor.visit_type,
).inc()
_submitted_save_requests_gauge.labels(
status=sor.status, visit_type=sor.visit_type
).inc()
if (
sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED)
and sor.visit_date is not None
and sor.request_date is not None
):
delay = sor.visit_date.timestamp() - sor.request_date.timestamp()
_accepted_save_requests_delay_gauge.labels(
load_task_status=sor.loading_task_status,
visit_type=sor.visit_type,
).inc(delay)
diff --git a/swh/web/templates/admin/origin-save/common.html b/swh/web/save_code_now/templates/admin/origin-save-common.html
similarity index 100%
rename from swh/web/templates/admin/origin-save/common.html
rename to swh/web/save_code_now/templates/admin/origin-save-common.html
diff --git a/swh/web/templates/admin/origin-save/filters.html b/swh/web/save_code_now/templates/admin/origin-save-filters.html
similarity index 98%
rename from swh/web/templates/admin/origin-save/filters.html
rename to swh/web/save_code_now/templates/admin/origin-save-filters.html
index eb3fc960..c50f0d76 100644
--- a/swh/web/templates/admin/origin-save/filters.html
+++ b/swh/web/save_code_now/templates/admin/origin-save-filters.html
@@ -1,76 +1,76 @@
-{% extends "./common.html" %}
+{% extends "./origin-save-common.html" %}
{% comment %}
Copyright (C) 2018-2022 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% block tab_content %}
{% endblock %}
diff --git a/swh/web/templates/admin/origin-save/requests.html b/swh/web/save_code_now/templates/admin/origin-save-requests.html
similarity index 99%
rename from swh/web/templates/admin/origin-save/requests.html
rename to swh/web/save_code_now/templates/admin/origin-save-requests.html
index 8ea2dad1..3a115311 100644
--- a/swh/web/templates/admin/origin-save/requests.html
+++ b/swh/web/save_code_now/templates/admin/origin-save-requests.html
@@ -1,93 +1,93 @@
-{% extends "./common.html" %}
+{% extends "./origin-save-common.html" %}
{% comment %}
Copyright (C) 2018-2022 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% block tab_content %}
{% endblock %}
diff --git a/swh/web/templates/misc/origin-save-help.html b/swh/web/save_code_now/templates/origin-save-help.html
similarity index 100%
rename from swh/web/templates/misc/origin-save-help.html
rename to swh/web/save_code_now/templates/origin-save-help.html
diff --git a/swh/web/templates/misc/origin-save-list.html b/swh/web/save_code_now/templates/origin-save-list.html
similarity index 100%
rename from swh/web/templates/misc/origin-save-list.html
rename to swh/web/save_code_now/templates/origin-save-list.html
diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/save_code_now/templates/origin-save.html
similarity index 99%
rename from swh/web/templates/misc/origin-save.html
rename to swh/web/save_code_now/templates/origin-save.html
index feecc0ec..04ee73f9 100644
--- a/swh/web/templates/misc/origin-save.html
+++ b/swh/web/save_code_now/templates/origin-save.html
@@ -1,88 +1,88 @@
-{% extends "../layout.html" %}
+{% extends "layout.html" %}
{% comment %}
Copyright (C) 2018-2021 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% load render_bundle from webpack_loader %}
{% load static %}
{% block title %}{{ heading }} – Software Heritage archive{% endblock %}
{% block header %}
{% render_bundle 'save' %}
{% endblock %}
{% block navbar-content %}
Save code now
{% endblock %}
{% block content %}
You can contribute to extend the content of the Software Heritage archive by submitting an origin
save request. To do so, fill the required info in the form below:
{% endfor %}
{% if snapshot_context.branches|length < snapshot_context.snapshot_sizes.revision %}
Branches list truncated to {{ snapshot_context.branches|length }} entries,
{{ snapshot_context.branches|length|mul:-1|add:snapshot_context.snapshot_sizes.revision }}
were omitted.
{% endif %}
{% if snapshot_context.releases %}
{% for r in snapshot_context.releases %}
{% if r.target_type == 'revision' or r.target_type == 'directory' %}
Releases list truncated to {{ snapshot_context.releases|length }} entries,
{{ snapshot_context.releases|length|mul:-1|add:snapshot_context.snapshot_sizes.release }}
were omitted.
{% endif %}
{% else %}
No releases to show
{% endif %}
{% endif %}
{% endif %}
{% include "includes/breadcrumbs.html" %}
{% if top_right_link %}
{% if top_right_link.icon %}
{% endif %}
{{ top_right_link.text }}
{% endif %}
{% if available_languages %}
{% endif %}
{% if show_actions %}
{% if not snapshot_context or not snapshot_context.is_empty %}
{% include "includes/vault-create-tasks.html" %}
{% endif %}
- {% include "includes/take-new-snapshot.html" %}
+ {% if "swh.web.save_code_now" in SWH_DJANGO_APPS %}
+ {% include "includes/take-new-snapshot.html" %}
+ {% endif %}
{% include "includes/show-metadata.html" %}
{% endif %}
{% include "includes/show-swhids.html" %}
diff --git a/swh/web/templates/layout.html b/swh/web/templates/layout.html
index 716708f9..86f89492 100644
--- a/swh/web/templates/layout.html
+++ b/swh/web/templates/layout.html
@@ -1,313 +1,315 @@
{% comment %}
Copyright (C) 2015-2022 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% load js_reverse %}
{% load static %}
{% load render_bundle from webpack_loader %}
{% load swh_templatetags %}
{% block title %}{% endblock %}
{% render_bundle 'vendors' %}
{% render_bundle 'webapp' %}
{% render_bundle 'guided_tour' %}
{{ request.user.is_authenticated|json_script:"swh_user_logged_in" }}
{% include "includes/favicon.html" %}
{% block header %}{% endblock %}
{% if swh_web_prod %}
{% endif %}