diff --git a/Makefile.local b/Makefile.local index 81c20fa7..8501fee3 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,41 +1,43 @@ TEST_DIRS := ./swh/web/tests .PHONY: build-webpack-dev build-webpack-dev: npm run build-dev .PHONY: build-webpack-dev-no-verbose build-webpack-dev-no-verbose: npm run build-dev >/dev/null .PHONY: build-webpack-prod build-webpack-prod: npm run build .PHONY: run-migrations run-migrations: python3 swh/web/manage.py migrate 2>/dev/null + python3 swh/web/manage.py createcachetable 2>/dev/null .PHONY: run-migrations-prod run-migrations-prod: django-admin migrate --settings=swh.web.settings.production 2>/dev/null + django-admin createcachetable --settings=swh.web.settings.production 2>/dev/null run-django-webpack-devserver: run-migrations bash -c "trap 'trap - SIGINT SIGTERM ERR; kill %1' SIGINT SIGTERM ERR; npm run start-dev & cd swh/web && python3 manage.py runserver --nostatic" run-django-webpack-dev: build-webpack-dev run-migrations python3 swh/web/manage.py runserver --nostatic run-django-webpack-prod: build-webpack-prod run-migrations-prod python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-django-server-dev: run-migrations python3 swh/web/manage.py runserver --nostatic run-django-server-prod: run-migrations-prod python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-gunicorn-server: run-migrations gunicorn3 -b 127.0.0.1:5004 swh.web.wsgi diff --git a/debian/control b/debian/control index bd7fc3ac..3a065eb2 100644 --- a/debian/control +++ b/debian/control @@ -1,48 +1,48 @@ Source: swh-web Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: curl, debhelper (>= 9), dh-python (>= 2), python3-all, python3-bs4, python3-django (>= 1.10.7~), python3-djangorestframework (>= 3.4.0~), python3-django-webpack-loader, python3-django-js-reverse, python3-docutils, python3-htmlmin, python3-hypothesis (>= 3.11.0~), python3-magic (>= 0.3.0~), python3-lxml, python3-pytest, python3-pytest-django, python3-pygments, python3-pypandoc, python3-requests, python3-setuptools, python3-sphinx, python3-sphinxcontrib.httpdomain, python3-yaml, python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.121~), + python3-swh.storage (>= 0.0.124~), python3-swh.indexer (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), python3-swh.loader.git (>= 0.0.47~) Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DWUI/ Package: python3-swh.web Architecture: all Depends: python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.121~), + python3-swh.storage (>= 0.0.124~), python3-swh.indexer.storage (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Web Applications diff --git a/debian/postinst b/debian/postinst index aa14081e..b8f8a547 100755 --- a/debian/postinst +++ b/debian/postinst @@ -1,3 +1,5 @@ #!/bin/bash django-admin migrate --settings=swh.web.settings.production +django-admin createcachetable --settings=swh.web.settings.production + diff --git a/requirements-swh.txt b/requirements-swh.txt index 849a14a1..e497fbc0 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ swh.core >= 0.0.40 swh.model >= 0.0.25 -swh.storage >= 0.0.121 +swh.storage >= 0.0.124 swh.vault >= 0.0.20 swh.indexer >= 0.0.120 swh.scheduler >= 0.0.31 \ No newline at end of file diff --git a/swh/web/assets/src/bundles/webapp/webapp.css b/swh/web/assets/src/bundles/webapp/webapp.css index aa3b654d..dfe18e56 100644 --- a/swh/web/assets/src/bundles/webapp/webapp.css +++ b/swh/web/assets/src/bundles/webapp/webapp.css @@ -1,471 +1,476 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ html { height: 100%; overflow-x: hidden; } body { min-height: 100%; margin: 0; position: relative; padding-bottom: 120px; } a { border-bottom-style: none; outline: none; } code { background-color: #f9f2f4; } pre code { background-color: transparent; } footer { background-color: #262626; color: #fff; font-size: 0.8rem; position: absolute; bottom: 0; width: 100%; padding-top: 20px; padding-bottom: 20px; } footer a, footer a:visited { color: #fecd1b; } footer a:hover { text-decoration: underline; } .link-color { color: #fecd1b; } pre { background-color: #f5f5f5; border: 1px solid #ccc; border-radius: 4px; padding: 9.5px; font-size: 0.8rem; } .btn.active { background-color: #e7e7e7; } .card { margin-bottom: 5px !important; overflow-x: auto; } .navbar-brand { padding: 5px; margin-right: 0; } .table { margin-bottom: 0; } .swh-table thead { background-color: #f2f4f5; border-top: 1px solid rgba(0, 0, 0, 0.2); font-weight: normal; } .swh-table-striped th { border-top: none; } .swh-table-striped tbody tr:nth-child(even) { background-color: #f2f4f5; } .swh-table-striped tbody tr:nth-child(odd) { background-color: #fff; } .swh-web-app-link a { text-decoration: none; outline: none; border: none; } .swh-web-app-link:hover { background-color: #efeff2; } .table > thead > tr > th { border-top: none; border-bottom: 1px solid #e20026; } .table > tbody > tr > td { border-style: none; } .sitename .first-word, .sitename .second-word { color: rgba(0, 0, 0, 0.75); font-weight: normal; font-size: 1.2rem; } .sitename .first-word { font-family: 'Alegreya Sans', sans-serif; } .sitename .second-word { font-family: 'Alegreya', serif; } .swh-counter { font-size: 150%; } .swh-http-error { margin: 0 auto; text-align: center; } .swh-http-error-head { color: #2d353c; font-size: 30px; } .swh-http-error-code { bottom: 60%; color: #2d353c; font-size: 96px; line-height: 80px; margin-bottom: 10px !important; } .swh-http-error-desc { font-size: 12px; color: #647788; text-align: center; } .swh-http-error-desc pre { display: inline-block; text-align: left; max-width: 800px; white-space: pre-wrap; } .popover { max-width: 100%; z-index: 2000; } .modal { text-align: center; padding: 0 !important; } .modal::before { content: ''; display: inline-block; height: 100%; vertical-align: middle; margin-right: -4px; } .modal-dialog { display: inline-block; text-align: left; vertical-align: middle; } .dropdown-submenu { position: relative; } .dropdown-submenu .dropdown-menu { top: 0; left: -100%; margin-top: -5px; margin-left: -2px; } .dropdown-item:hover, .dropdown-item:focus { background-color: rgba(0, 0, 0, 0.1); } a.dropdown-left::before { content: "\f0d9"; font-family: 'FontAwesome'; display: block; width: 20px; height: 20px; float: left; margin-left: 0; } #swh-navbar { border-top-style: none; border-left-style: none; border-right-style: none; border-bottom-style: solid; border-bottom-width: 5px; border-image: linear-gradient(to right, rgb(226, 0, 38) 0%, rgb(254, 205, 27) 100%) 1 1 1 1; width: 100%; padding: 5px; margin-bottom: 10px; margin-top: 30px; justify-content: normal; flex-wrap: nowrap; height: 72px; overflow: hidden; } #back-to-top { display: initial; position: fixed; bottom: 30px; right: 30px; z-index: 10; } #back-to-top a img { display: block; width: 32px; height: 32px; background-size: 32px 32px; text-indent: -999px; overflow: hidden; } .swh-top-bar { direction: ltr; height: 30px; position: fixed; top: 0; left: 0; width: 100%; z-index: 99999; background-color: #262626; color: #fff; text-align: center; font-size: 14px; } .swh-top-bar ul { margin-top: 4px; padding-left: 0; white-space: nowrap; } .swh-top-bar li { display: inline-block; margin-left: 10px; margin-right: 10px; } .swh-top-bar a, .swh-top-bar a:visited { color: white; } .swh-top-bar a.swh-current-site, .swh-top-bar a.swh-current-site:visited { color: #fecd1b; } .swh-position-right { position: absolute; right: 0; } .swh-donate-link { border: 1px solid #fecd1b; background-color: #e20026; color: white !important; padding: 3px; border-radius: 3px; } .swh-navbar-content h4 { padding-top: 7px; } .swh-navbar-content .bread-crumbs { display: block; margin-left: -40px; } .swh-navbar-content .bread-crumbs li.bc-no-root { padding-top: 7px; } .main-sidebar { margin-top: 30px; } .content-wrapper { background: none; } .brand-image { max-height: 40px; } .brand-link { padding-top: 18.5px; padding-bottom: 18px; padding-left: 4px; border-bottom: 5px solid #e20026 !important; } .navbar-header a, ul.dropdown-menu a, ul.navbar-nav a, ul.nav-sidebar a { border-bottom-style: none; color: #323232; } .swh-sidebar .nav-link.active { color: #323232 !important; background-color: #e7e7e7 !important; } .swh-image-error { width: 80px; height: auto; } @media (max-width: 600px) { .swh-image-error { width: 40px; height: auto; } .swh-navbar-content h4 { font-size: 1rem; } } .form-check-label { padding-top: 4px; } .swh-id-option { display: inline-block; margin-right: 5px; } .nav-pills .nav-link:not(.active):hover { color: rgba(0, 0, 0, 0.55); } .swh-heading-color { color: #e20026 !important; } .sidebar-mini.sidebar-collapse .main-sidebar:hover { width: 4.6rem; } .sidebar-mini.sidebar-collapse .main-sidebar:hover .user-panel > .info, .sidebar-mini.sidebar-collapse .main-sidebar:hover .nav-sidebar .nav-link p, .sidebar-mini.sidebar-collapse .main-sidebar:hover .brand-text { visibility: hidden !important; } .sidebar .nav-link p, .main-sidebar .brand-text, .sidebar .user-panel .info { transition: none; } .sidebar-mini.sidebar-mini.sidebar-collapse .sidebar { padding-right: 0; } .swh-words-logo { position: absolute; top: 0; left: 0; width: 73px; height: 73px; text-align: center; font-size: 10pt; color: rgba(0, 0, 0, 0.75); } .swh-words-logo:hover { text-decoration: none; } .swh-words-logo-swh { line-height: 1; padding-top: 13px; visibility: hidden; } hr.swh-faded-line { border: 0; height: 1px; background-image: linear-gradient(to left, #f0f0f0, #8c8b8b, #f0f0f0); } .swh-readme-txt pre { background: none; border: none; } +.swh-coverage-col { + padding-left: 10px; + padding-right: 10px; +} + .swh-coverage { - height: 65px; + height: calc(65px + 1em); padding-top: 0.3rem; border: none; } .swh-coverage a { text-decoration: none; } .swh-coverage-logo { display: block; width: 100%; height: 50px; margin-left: auto; margin-right: auto; object-fit: contain; /* polyfill for old browsers, see https://github.com/bfred-it/object-fit-images */ font-family: 'object-fit: contain;'; } .swh-coverage-list { width: 100%; height: 320px; border: none; } tr.swh-tr-hover-highlight { cursor: pointer; } tr.swh-tr-hover-highlight:hover td { background: #ededed; } tr.swh-api-doc-route a { text-decoration: none; } diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index f82cd5b6..0550d760 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,242 +1,292 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from distutils.util import strtobool +from django.core.cache import caches from django.http import HttpResponse from django.shortcuts import render, redirect +from django.views.decorators.cache import never_cache from swh.web.common import service from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import ( reverse, format_utc_iso_date, parse_timestamp ) from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import ( get_origin_info, get_snapshot_context ) from swh.web.browse.browseurls import browse_route +from swh.web.misc.coverage import code_providers from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases ) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/directory/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/directory/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P.+)/visit/(?P.+)/directory/', # noqa r'origin/(?P.+)/visit/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P.+)/directory/', # noqa r'origin/(?P.+)/directory/(?P.+)/', # noqa view_name='browse-origin-directory') def origin_directory_browse(request, origin_url, origin_type=None, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/directory/[(path)/]` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/directory/[(path)/]` """ # noqa return browse_snapshot_directory( request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P.+)/visit/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P.+)/content/(?P.+)/', # noqa view_name='browse-origin-content') def origin_content_browse(request, origin_url, origin_type=None, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/content/(path)/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/content/(path)/` """ # noqa return browse_snapshot_content(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) PER_PAGE = 20 @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/log/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/log/', r'origin/(?P.+)/visit/(?P.+)/log/', # noqa r'origin/(?P.+)/log/', view_name='browse-origin-log') def origin_log_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/log/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/log/` """ # noqa return browse_snapshot_log(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/branches/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/branches/', # noqa r'origin/(?P.+)/visit/(?P.+)/branches/', # noqa r'origin/(?P.+)/branches/', # noqa view_name='browse-origin-branches') def origin_branches_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/branches/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/branches/` """ # noqa return browse_snapshot_branches(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/releases/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/releases/', # noqa r'origin/(?P.+)/visit/(?P.+)/releases/', # noqa r'origin/(?P.+)/releases/', # noqa view_name='browse-origin-releases') def origin_releases_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/releases/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/releases/` """ # noqa return browse_snapshot_releases(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visits/', r'origin/(?P.+)/visits/', view_name='browse-origin-visits') def origin_visits_browse(request, origin_url, origin_type=None): """Django view that produces an HTML display of visits reporting for a swh origin identified by its id or its url. The url that points to it is :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visits/`. """ # noqa try: origin_info = get_origin_info(origin_url, origin_type) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_type=origin_type, origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') visit['fmt_date'] = format_utc_iso_date(visit['date']) query_params = {} if i < len(origin_visits) - 1: if visit['date'] == origin_visits[i+1]['date']: query_params = {'visit_id': visit['visit']} if i > 0: if visit['date'] == origin_visits[i-1]['date']: query_params = {'visit_id': visit['visit']} snapshot = visit['snapshot'] if visit['snapshot'] else '' visit['browse_url'] = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url, 'timestamp': url_date}, query_params=query_params) if not snapshot: visit['snapshot'] = '' visit['date'] = parse_timestamp(visit['date']).timestamp() heading = 'Origin visits - %s' % origin_url return render(request, 'browse/origin-visits.html', {'heading': heading, 'swh_object_name': 'Visits', 'swh_object_metadata': origin_info, 'origin_visits': origin_visits, 'origin_info': origin_info, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': False}) @browse_route(r'origin/search/(?P.+)/', view_name='browse-origin-search') def _origin_search(request, url_pattern): """Internal browse endpoint to search for origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. """ offset = int(request.GET.get('offset', '0')) limit = int(request.GET.get('limit', '50')) regexp = request.GET.get('regexp', 'false') with_visit = request.GET.get('with_visit', 'false') url_pattern = url_pattern.replace('///', '\\') try: results = service.search_origin(url_pattern, offset, limit, bool(strtobool(regexp)), bool(strtobool(with_visit))) results = json.dumps(list(results), sort_keys=True, indent=4, separators=(',', ': ')) except Exception as exc: return handle_view_exception(request, exc, html_response=False) return HttpResponse(results, content_type='application/json') +@browse_route(r'origin/coverage_count/', + view_name='browse-origin-coverage-count') +@never_cache +def _origin_coverage_count(request): + """Internal browse endpoint to count the number of origins associated + to each code provider declared in the archive coverage list. + As this operation takes some times, we execute it once per day and + cache its results to database. The cached origin counts are then served. + Cache management is handled in the implementation to avoid sending + the same count query twice to the storage database. + """ + try: + cache = caches['db_cache'] + results = [] + for code_provider in code_providers: + provider_id = code_provider['provider_id'] + url_regexp = code_provider['origin_url_regexp'] + cache_key = '%s_origins_count' % provider_id + prev_cache_key = '%s_origins_prev_count' % provider_id + # get cached origin count + origin_count = cache.get(cache_key, -2) + # cache entry has expired or does not exist + if origin_count == -2: + # mark the origin count as processing + cache.set(cache_key, -1, timeout=10*60) + # execute long count query + origin_count = service.storage.origin_count(url_regexp, + regexp=True) + # cache count result + cache.set(cache_key, origin_count, timeout=24*60*60) + cache.set(prev_cache_key, origin_count, timeout=None) + # origin count is currently processing + elif origin_count == -1: + # return previous count if it exists + origin_count = cache.get(prev_cache_key, -1) + results.append({ + 'provider_id': provider_id, + 'origin_count': origin_count, + 'origin_types': code_provider['origin_types'] + }) + results = json.dumps(results) + except Exception as exc: + return handle_view_exception(request, exc, html_response=False) + + return HttpResponse(results, content_type='application/json') + + @browse_route(r'origin/(?P[0-9]+)/latest_snapshot/', view_name='browse-origin-latest-snapshot') def _origin_latest_snapshot(request, origin_id): """ Internal browse endpoint used to check if an origin has already been visited by Software Heritage and has at least one full visit. """ result = \ service.lookup_latest_origin_snapshot(origin_id, allowed_statuses=['full', 'partial']) result = json.dumps(result, sort_keys=True, indent=4, separators=(',', ': ')) return HttpResponse(result, content_type='application/json') @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/', r'origin/(?P.+)/', view_name='browse-origin') def origin_browse(request, origin_url, origin_type=None): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ # noqa last_snapshot_url = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url}) return redirect(last_snapshot_url) diff --git a/swh/web/config.py b/swh/web/config.py index b21e1faa..45c34b76 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,131 +1,132 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.core import config from swh.storage import get_storage from swh.indexer.storage import get_indexer_storage from swh.vault.api.client import RemoteVaultClient from swh.scheduler import get_scheduler DEFAULT_CONFIG = { 'allowed_hosts': ('list', []), 'storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5002/', 'timeout': 10, }, }), 'indexer_storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5007/', 'timeout': 1, } }), 'vault': ('string', 'http://127.0.0.1:5005/'), 'log_dir': ('string', '/tmp/swh/log'), 'debug': ('bool', False), 'serve_assets': ('bool', False), 'host': ('string', '127.0.0.1'), 'port': ('int', 5004), 'secret_key': ('string', 'development key'), # do not display code highlighting for content > 1MB 'content_display_max_size': ('int', 1024 * 1024), 'snapshot_content_max_size': ('int', 1000), 'throttling': ('dict', { 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '120/h' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_vault_cooking': { 'limiter_rate': { 'default': '120/h', 'GET': '60/m' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_save_origin': { 'limiter_rate': { 'default': '120/h', 'POST': '10/h' }, 'exempted_networks': ['127.0.0.0/8'] } } }), 'scheduler': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://localhost:5008/' } }), 'grecaptcha': ('dict', { 'activated': True, 'validation_url': 'https://www.google.com/recaptcha/api/siteverify', 'site_key': '', 'private_key': '' }), 'production_db': ('string', '/var/lib/swh/web.sqlite3'), 'deposit': ('dict', { 'private_api_url': 'https://deposit.softwareheritage.org/1/private/', 'private_api_user': 'swhworker', 'private_api_password': '' - }) + }), + 'coverage_count_origins': ('bool', False) } swhweb_config = {} def get_config(config_file='web/web'): """Read the configuration file `config_file`, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration.""" if not swhweb_config: cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, 'log_dir') swhweb_config['storage'] = get_storage(**swhweb_config['storage']) swhweb_config['vault'] = RemoteVaultClient(swhweb_config['vault']) swhweb_config['indexer_storage'] = \ get_indexer_storage(**swhweb_config['indexer_storage']) swhweb_config['scheduler'] = get_scheduler(**swhweb_config['scheduler']) # noqa return swhweb_config def storage(): """Return the current application's storage. """ return get_config()['storage'] def vault(): """Return the current application's vault. """ return get_config()['vault'] def indexer_storage(): """Return the current application's indexer storage. """ return get_config()['indexer_storage'] def scheduler(): """Return the current application's scheduler. """ return get_config()['scheduler'] diff --git a/swh/web/misc/coverage.py b/swh/web/misc/coverage.py index 0b88c071..0c736919 100644 --- a/swh/web/misc/coverage.py +++ b/swh/web/misc/coverage.py @@ -1,76 +1,111 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from django.views.decorators.clickjacking import xframe_options_exempt +from swh.web.config import get_config + # Current coverage list of the archive # TODO: Retrieve that list dynamically instead of hardcoding it -_code_providers = [ +code_providers = [ { + 'provider_id': 'debian', 'provider_url': 'https://www.debian.org/', 'provider_logo': 'img/logos/debian.png', 'provider_info': 'source packages from the Debian distribution ' '(continuously archived)', + 'origin_url_regexp': '^deb://', + 'origin_types': 'packages', }, { + 'provider_id': 'framagit', 'provider_url': 'https://framagit.org/', 'provider_logo': 'img/logos/framagit.png', 'provider_info': 'public repositories from Framagit ' '(continuously archived)', + 'origin_url_regexp': '^https://framagit.org/', + 'origin_types': 'repositories', }, { + 'provider_id': 'github', 'provider_url': 'https://github.com', 'provider_logo': 'img/logos/github.png', 'provider_info': 'public repositories from GitHub ' '(continuously archived)', + 'origin_url_regexp': '^https://github.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gitlab', 'provider_url': 'https://gitlab.com', 'provider_logo': 'img/logos/gitlab.svg', 'provider_info': 'public repositories from GitLab ' '(continuously archived)', + 'origin_url_regexp': '^https://gitlab.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gitorious', 'provider_url': 'https://gitorious.org/', 'provider_logo': 'img/logos/gitorious.png', 'provider_info': 'public repositories from the former Gitorious code ' 'hosting service', + 'origin_url_regexp': '^https://gitorious.org/', + 'origin_types': 'repositories', }, { + 'provider_id': 'googlecode', 'provider_url': 'https://code.google.com/archive/', 'provider_logo': 'img/logos/googlecode.png', 'provider_info': 'public repositories from the former Google Code ' 'project hosting service', + 'origin_url_regexp': '^http.*.googlecode.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gnu', 'provider_url': 'https://www.gnu.org', 'provider_logo': 'img/logos/gnu.png', 'provider_info': 'releases from the GNU project (as of August 2015)', + 'origin_url_regexp': '^rsync://ftp.gnu.org/', + 'origin_types': 'releases', }, { + 'provider_id': 'hal', 'provider_url': 'https://hal.archives-ouvertes.fr/', 'provider_logo': 'img/logos/hal.png', 'provider_info': 'scientific software source code deposited in the ' - 'open archive HAL' + 'open archive HAL', + 'origin_url_regexp': '^https://hal.archives-ouvertes.fr/', + 'origin_types': 'deposits', + }, { + 'provider_id': 'inria', 'provider_url': 'https://gitlab.inria.fr', 'provider_logo': 'img/logos/inria.jpg', 'provider_info': 'public repositories from Inria GitLab ' '(continuously archived)', + 'origin_url_regexp': '^https://gitlab.inria.fr/', + 'origin_types': 'repositories', }, { + 'provider_id': 'pypi', 'provider_url': 'https://pypi.org', 'provider_logo': 'img/logos/pypi.svg', 'provider_info': 'source packages from the Python Packaging Index ' '(continuously archived)', + 'origin_url_regexp': '^https://pypi.org/', + 'origin_types': 'packages', }, ] @xframe_options_exempt def swh_coverage(request): - return render(request, 'coverage.html', {'providers': _code_providers}) + count_origins = get_config()['coverage_count_origins'] + return render(request, 'coverage.html', {'providers': code_providers, + 'count_origins': count_origins}) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index b62e388f..eafb2943 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,235 +1,245 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ import os from swh.web.config import get_config swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config['secret_key'] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config['debug'] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config['debug'] ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] + swh_web_config['allowed_hosts'] # Application definition INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'rest_framework', 'swh.web.common', 'swh.web.api', 'swh.web.browse', 'webpack_loader', 'django_js_reverse' ] MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', 'swh.web.common.middlewares.ThrottlingHeadersMiddleware' ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config['serve_assets']: MIDDLEWARE.insert(0, 'django.middleware.gzip.GZipMiddleware') ROOT_URLCONF = 'swh.web.urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [os.path.join(PROJECT_DIR, "../templates")], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', 'swh.web.common.utils.context_processor' ], 'libraries': { 'swh_templatetags': 'swh.web.common.swh_templatetags', }, }, }, ] WSGI_APPLICATION = 'swh.web.wsgi.application' DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(PROJECT_DIR, 'db.sqlite3'), } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa }, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'UTC' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = '/static/' STATICFILES_DIRS = [ os.path.join(PROJECT_DIR, "../static") ] INTERNAL_IPS = ['127.0.0.1'] throttle_rates = {} http_requests = ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'PATCH'] throttling = swh_web_config['throttling'] for limiter_scope, limiter_conf in throttling['scopes'].items(): if 'default' in limiter_conf['limiter_rate']: throttle_rates[limiter_scope] = limiter_conf['limiter_rate']['default'] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf['limiter_rate'] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf['limiter_rate']: throttle_rates[limiter_scope + '_' + http_request.lower()] = \ limiter_conf['limiter_rate'][http_request] REST_FRAMEWORK = { 'DEFAULT_RENDERER_CLASSES': ( 'rest_framework.renderers.JSONRenderer', 'swh.web.api.renderers.YAMLRenderer', 'rest_framework.renderers.TemplateHTMLRenderer' ), 'DEFAULT_THROTTLE_CLASSES': ( 'swh.web.common.throttling.SwhWebRateThrottle', ), 'DEFAULT_THROTTLE_RATES': throttle_rates } LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'filters': { 'require_debug_false': { '()': 'django.utils.log.RequireDebugFalse', }, 'require_debug_true': { '()': 'django.utils.log.RequireDebugTrue', }, }, 'formatters': { 'verbose': { 'format': '[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s', # noqa 'datefmt': "%d/%b/%Y %H:%M:%S" }, }, 'handlers': { 'console': { 'level': 'DEBUG', 'filters': ['require_debug_true'], 'class': 'logging.StreamHandler', }, 'file': { 'level': 'INFO', 'filters': ['require_debug_false'], 'class': 'logging.FileHandler', 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), 'formatter': 'verbose' }, 'null': { 'class': 'logging.NullHandler', }, }, 'loggers': { 'django': { 'handlers': ['console', 'file'], 'level': 'DEBUG' if DEBUG else 'INFO', 'propagate': True, }, 'django.request': { 'handlers': ['file'], 'level': 'DEBUG' if DEBUG else 'INFO', 'propagate': False, }, 'django.db.backends': { 'handlers': ['null'], 'propagate': False } }, } WEBPACK_LOADER = { # noqa 'DEFAULT': { 'CACHE': False, 'BUNDLE_DIR_NAME': './', 'STATS_FILE': os.path.join(PROJECT_DIR, '../static/webpack-stats.json'), # noqa 'POLL_INTERVAL': 0.1, 'TIMEOUT': None, 'IGNORE': ['.+\.hot-update.js', '.+\.map'] } } LOGIN_URL = '/admin/login/' LOGIN_REDIRECT_URL = 'admin' SESSION_ENGINE = 'django.contrib.sessions.backends.cache' + +CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache' + }, + 'db_cache': { + 'BACKEND': 'django.core.cache.backends.db.DatabaseCache', + 'LOCATION': 'swh_web_cache', + } +} diff --git a/swh/web/settings/production.py b/swh/web/settings/production.py index 48357f5b..3987ea03 100644 --- a/swh/web/settings/production.py +++ b/swh/web/settings/production.py @@ -1,54 +1,54 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa """ Django production settings for swh-web. """ import os from .common import * from .common import swh_web_config from .common import REST_FRAMEWORK # activate per-site caching if 'GZip' in MIDDLEWARE[0]: MIDDLEWARE.insert(1, 'django.middleware.cache.UpdateCacheMiddleware') else: MIDDLEWARE.insert(0, 'django.middleware.cache.UpdateCacheMiddleware') MIDDLEWARE += ['swh.web.common.middlewares.HtmlMinifyMiddleware', 'django.middleware.cache.FetchFromCacheMiddleware'] -CACHES = { +CACHES.update({ 'default': { 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', 'LOCATION': swh_web_config['throttling']['cache_uri'], } } # Setup support for proxy headers USE_X_FORWARDED_HOST = True SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') # We're going through seven (or, in that case, 2) proxies thanks to Varnish REST_FRAMEWORK['NUM_PROXIES'] = 2 ALLOWED_HOSTS += [ 'archive.softwareheritage.org', 'base.softwareheritage.org', 'archive.internal.softwareheritage.org', ] DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': swh_web_config['production_db'], } } WEBPACK_LOADER['DEFAULT']['CACHE'] = True diff --git a/swh/web/templates/coverage.html b/swh/web/templates/coverage.html index 2877a9ef..844e1e98 100644 --- a/swh/web/templates/coverage.html +++ b/swh/web/templates/coverage.html @@ -1,43 +1,67 @@ {% comment %} Copyright (C) 2015-2018 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load static %} {% load render_bundle from webpack_loader %} Software Heritage archive coverage {% render_bundle 'vendors' %} {% render_bundle 'webapp' %} +
{% for provider in providers %} -
-
+
+
+
{% endfor %}
+ {% if count_origins %} + + {% endif %}