diff --git a/Makefile.local b/Makefile.local --- a/Makefile.local +++ b/Makefile.local @@ -15,10 +15,12 @@ .PHONY: run-migrations run-migrations: python3 swh/web/manage.py migrate 2>/dev/null + python3 swh/web/manage.py createcachetable 2>/dev/null .PHONY: run-migrations-prod run-migrations-prod: django-admin migrate --settings=swh.web.settings.production 2>/dev/null + django-admin createcachetable --settings=swh.web.settings.production 2>/dev/null run-django-webpack-devserver: run-migrations bash -c "trap 'trap - SIGINT SIGTERM ERR; kill %1' SIGINT SIGTERM ERR; npm run start-dev & cd swh/web && python3 manage.py runserver --nostatic" diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -27,7 +27,7 @@ python3-yaml, python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.121~), + python3-swh.storage (>= 0.0.124~), python3-swh.indexer (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), @@ -39,7 +39,7 @@ Architecture: all Depends: python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.121~), + python3-swh.storage (>= 0.0.124~), python3-swh.indexer.storage (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), diff --git a/debian/postinst b/debian/postinst --- a/debian/postinst +++ b/debian/postinst @@ -1,3 +1,5 @@ #!/bin/bash django-admin migrate --settings=swh.web.settings.production +django-admin createcachetable --settings=swh.web.settings.production + diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ swh.core >= 0.0.40 swh.model >= 0.0.25 -swh.storage >= 0.0.121 +swh.storage >= 0.0.124 swh.vault >= 0.0.20 swh.indexer >= 0.0.120 swh.scheduler >= 0.0.31 \ No newline at end of file diff --git a/swh/web/assets/src/bundles/webapp/webapp.css b/swh/web/assets/src/bundles/webapp/webapp.css --- a/swh/web/assets/src/bundles/webapp/webapp.css +++ b/swh/web/assets/src/bundles/webapp/webapp.css @@ -430,8 +430,13 @@ border: none; } +.swh-coverage-col { + padding-left: 10px; + padding-right: 10px; +} + .swh-coverage { - height: 65px; + height: calc(65px + 1em); padding-top: 0.3rem; border: none; } diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -7,8 +7,10 @@ from distutils.util import strtobool +from django.core.cache import caches from django.http import HttpResponse from django.shortcuts import render, redirect +from django.views.decorators.cache import never_cache from swh.web.common import service from swh.web.common.origin_visits import get_origin_visits @@ -20,6 +22,7 @@ get_origin_info, get_snapshot_context ) from swh.web.browse.browseurls import browse_route +from swh.web.misc.coverage import code_providers from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, @@ -211,6 +214,53 @@ return HttpResponse(results, content_type='application/json') +@browse_route(r'origin/coverage_count/', + view_name='browse-origin-coverage-count') +@never_cache +def _origin_coverage_count(request): + """Internal browse endpoint to count the number of origins associated + to each code provider declared in the archive coverage list. + As this operation takes some times, we execute it once per day and + cache its results to database. The cached origin counts are then served. + Cache management is handled in the implementation to avoid sending + the same count query twice to the storage database. + """ + try: + cache = caches['db_cache'] + results = [] + for code_provider in code_providers: + provider_id = code_provider['provider_id'] + url_regexp = code_provider['origin_url_regexp'] + cache_key = '%s_origins_count' % provider_id + prev_cache_key = '%s_origins_prev_count' % provider_id + # get cached origin count + origin_count = cache.get(cache_key, -2) + # cache entry has expired or does not exist + if origin_count == -2: + # mark the origin count as processing + cache.set(cache_key, -1, timeout=10*60) + # execute long count query + origin_count = service.storage.origin_count(url_regexp, + regexp=True) + # cache count result + cache.set(cache_key, origin_count, timeout=24*60*60) + cache.set(prev_cache_key, origin_count, timeout=None) + # origin count is currently processing + elif origin_count == -1: + # return previous count if it exists + origin_count = cache.get(prev_cache_key, -1) + results.append({ + 'provider_id': provider_id, + 'origin_count': origin_count, + 'origin_types': code_provider['origin_types'] + }) + results = json.dumps(results) + except Exception as exc: + return handle_view_exception(request, exc, html_response=False) + + return HttpResponse(results, content_type='application/json') + + @browse_route(r'origin/(?P[0-9]+)/latest_snapshot/', view_name='browse-origin-latest-snapshot') def _origin_latest_snapshot(request, origin_id): diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -79,7 +79,8 @@ 'private_api_url': 'https://deposit.softwareheritage.org/1/private/', 'private_api_user': 'swhworker', 'private_api_password': '' - }) + }), + 'coverage_count_origins': ('bool', False) } swhweb_config = {} diff --git a/swh/web/misc/coverage.py b/swh/web/misc/coverage.py --- a/swh/web/misc/coverage.py +++ b/swh/web/misc/coverage.py @@ -6,71 +6,106 @@ from django.shortcuts import render from django.views.decorators.clickjacking import xframe_options_exempt +from swh.web.config import get_config + # Current coverage list of the archive # TODO: Retrieve that list dynamically instead of hardcoding it -_code_providers = [ +code_providers = [ { + 'provider_id': 'debian', 'provider_url': 'https://www.debian.org/', 'provider_logo': 'img/logos/debian.png', 'provider_info': 'source packages from the Debian distribution ' '(continuously archived)', + 'origin_url_regexp': '^deb://', + 'origin_types': 'packages', }, { + 'provider_id': 'framagit', 'provider_url': 'https://framagit.org/', 'provider_logo': 'img/logos/framagit.png', 'provider_info': 'public repositories from Framagit ' '(continuously archived)', + 'origin_url_regexp': '^https://framagit.org/', + 'origin_types': 'repositories', }, { + 'provider_id': 'github', 'provider_url': 'https://github.com', 'provider_logo': 'img/logos/github.png', 'provider_info': 'public repositories from GitHub ' '(continuously archived)', + 'origin_url_regexp': '^https://github.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gitlab', 'provider_url': 'https://gitlab.com', 'provider_logo': 'img/logos/gitlab.svg', 'provider_info': 'public repositories from GitLab ' '(continuously archived)', + 'origin_url_regexp': '^https://gitlab.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gitorious', 'provider_url': 'https://gitorious.org/', 'provider_logo': 'img/logos/gitorious.png', 'provider_info': 'public repositories from the former Gitorious code ' 'hosting service', + 'origin_url_regexp': '^https://gitorious.org/', + 'origin_types': 'repositories', }, { + 'provider_id': 'googlecode', 'provider_url': 'https://code.google.com/archive/', 'provider_logo': 'img/logos/googlecode.png', 'provider_info': 'public repositories from the former Google Code ' 'project hosting service', + 'origin_url_regexp': '^http.*.googlecode.com/', + 'origin_types': 'repositories', }, { + 'provider_id': 'gnu', 'provider_url': 'https://www.gnu.org', 'provider_logo': 'img/logos/gnu.png', 'provider_info': 'releases from the GNU project (as of August 2015)', + 'origin_url_regexp': '^rsync://ftp.gnu.org/', + 'origin_types': 'releases', }, { + 'provider_id': 'hal', 'provider_url': 'https://hal.archives-ouvertes.fr/', 'provider_logo': 'img/logos/hal.png', 'provider_info': 'scientific software source code deposited in the ' - 'open archive HAL' + 'open archive HAL', + 'origin_url_regexp': '^https://hal.archives-ouvertes.fr/', + 'origin_types': 'deposits', + }, { + 'provider_id': 'inria', 'provider_url': 'https://gitlab.inria.fr', 'provider_logo': 'img/logos/inria.jpg', 'provider_info': 'public repositories from Inria GitLab ' '(continuously archived)', + 'origin_url_regexp': '^https://gitlab.inria.fr/', + 'origin_types': 'repositories', }, { + 'provider_id': 'pypi', 'provider_url': 'https://pypi.org', 'provider_logo': 'img/logos/pypi.svg', 'provider_info': 'source packages from the Python Packaging Index ' '(continuously archived)', + 'origin_url_regexp': '^https://pypi.org/', + 'origin_types': 'packages', }, ] @xframe_options_exempt def swh_coverage(request): - return render(request, 'coverage.html', {'providers': _code_providers}) + count_origins = get_config()['coverage_count_origins'] + return render(request, 'coverage.html', {'providers': code_providers, + 'count_origins': count_origins}) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -233,3 +233,13 @@ LOGIN_REDIRECT_URL = 'admin' SESSION_ENGINE = 'django.contrib.sessions.backends.cache' + +CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache' + }, + 'db_cache': { + 'BACKEND': 'django.core.cache.backends.db.DatabaseCache', + 'LOCATION': 'swh_web_cache', + } +} diff --git a/swh/web/settings/production.py b/swh/web/settings/production.py --- a/swh/web/settings/production.py +++ b/swh/web/settings/production.py @@ -24,7 +24,7 @@ MIDDLEWARE += ['swh.web.common.middlewares.HtmlMinifyMiddleware', 'django.middleware.cache.FetchFromCacheMiddleware'] -CACHES = { +CACHES.update({ 'default': { 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', 'LOCATION': swh_web_config['throttling']['cache_uri'], diff --git a/swh/web/templates/coverage.html b/swh/web/templates/coverage.html --- a/swh/web/templates/coverage.html +++ b/swh/web/templates/coverage.html @@ -18,6 +18,7 @@ Software Heritage archive coverage {% render_bundle 'vendors' %} {% render_bundle 'webapp' %} + @@ -27,11 +28,12 @@
{% for provider in providers %} -
-
+
+
+
{% endfor %} @@ -40,4 +42,26 @@
+ {% if count_origins %} + + {% endif %}