diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index 5eb068d7..7a55a06a 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,308 +1,290 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from distutils.util import strtobool from django.core.cache import caches from django.http import HttpResponse from django.shortcuts import render, redirect from django.views.decorators.cache import never_cache from swh.web.common import service from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import ( reverse, format_utc_iso_date, parse_timestamp ) from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import ( get_origin_info, get_snapshot_context ) from swh.web.browse.browseurls import browse_route from swh.web.misc.coverage import code_providers from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases ) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/directory/', r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/directory/(?P.+)/', r'origin/(?P[a-z]+)/url/(?P.+)' '/directory/', r'origin/(?P[a-z]+)/url/(?P.+)' '/directory/(?P.+)/', r'origin/(?P.+)/visit/(?P.+)/directory/', r'origin/(?P.+)/visit/(?P.+)' '/directory/(?P.+)/', r'origin/(?P.+)/directory/', r'origin/(?P.+)/directory/(?P.+)/', view_name='browse-origin-directory') def origin_directory_browse(request, origin_url, origin_type=None, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/directory/[(path)/]` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/directory/[(path)/]` """ # noqa return browse_snapshot_directory( request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/content/(?P.+)/', r'origin/(?P[a-z]+)/url/(?P.+)' '/content/(?P.+)/', r'origin/(?P.+)/visit/(?P.+)' '/content/(?P.+)/', r'origin/(?P.+)/content/(?P.+)/', view_name='browse-origin-content') def origin_content_browse(request, origin_url, origin_type=None, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/content/(path)/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/content/(path)/` """ # noqa return browse_snapshot_content(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) PER_PAGE = 20 @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/log/', r'origin/(?P[a-z]+)/url/(?P.+)/log/', r'origin/(?P.+)/visit/(?P.+)/log/', r'origin/(?P.+)/log/', view_name='browse-origin-log') def origin_log_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/log/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/log/` """ # noqa return browse_snapshot_log(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/branches/', r'origin/(?P[a-z]+)/url/(?P.+)' '/branches/', r'origin/(?P.+)/visit/(?P.+)/branches/', r'origin/(?P.+)/branches/', view_name='browse-origin-branches') def origin_branches_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/branches/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/branches/` """ # noqa return browse_snapshot_branches(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)' '/visit/(?P.+)/releases/', r'origin/(?P[a-z]+)/url/(?P.+)' '/releases/', r'origin/(?P.+)/visit/(?P.+)/releases/', r'origin/(?P.+)/releases/', view_name='browse-origin-releases') def origin_releases_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/releases/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/releases/` """ # noqa return browse_snapshot_releases(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visits/', r'origin/(?P.+)/visits/', view_name='browse-origin-visits') def origin_visits_browse(request, origin_url, origin_type=None): """Django view that produces an HTML display of visits reporting for a swh origin identified by its id or its url. The url that points to it is :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visits/`. """ try: origin_info = get_origin_info(origin_url, origin_type) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_type=origin_type, origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') visit['fmt_date'] = format_utc_iso_date(visit['date']) query_params = {} if i < len(origin_visits) - 1: if visit['date'] == origin_visits[i+1]['date']: query_params = {'visit_id': visit['visit']} if i > 0: if visit['date'] == origin_visits[i-1]['date']: query_params = {'visit_id': visit['visit']} snapshot = visit['snapshot'] if visit['snapshot'] else '' visit['browse_url'] = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url, 'timestamp': url_date}, query_params=query_params) if not snapshot: visit['snapshot'] = '' visit['date'] = parse_timestamp(visit['date']).timestamp() heading = 'Origin visits - %s' % origin_url return render(request, 'browse/origin-visits.html', {'heading': heading, 'swh_object_name': 'Visits', 'swh_object_metadata': origin_info, 'origin_visits': origin_visits, 'origin_info': origin_info, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': False}) @browse_route(r'origin/search/(?P.+)/', view_name='browse-origin-search') def _origin_search(request, url_pattern): """Internal browse endpoint to search for origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. """ offset = int(request.GET.get('offset', '0')) limit = int(request.GET.get('limit', '50')) regexp = request.GET.get('regexp', 'false') with_visit = request.GET.get('with_visit', 'false') url_pattern = url_pattern.replace('///', '\\') try: results = service.search_origin(url_pattern, offset, limit, bool(strtobool(regexp)), bool(strtobool(with_visit))) results = json.dumps(list(results), sort_keys=True, indent=4, separators=(',', ': ')) except Exception as exc: return handle_view_exception(request, exc, html_response=False) return HttpResponse(results, content_type='application/json') @browse_route(r'origin/coverage_count/', view_name='browse-origin-coverage-count') @never_cache def _origin_coverage_count(request): """Internal browse endpoint to count the number of origins associated to each code provider declared in the archive coverage list. As this operation takes some times, we execute it once per day and cache its results to database. The cached origin counts are then served. Cache management is handled in the implementation to avoid sending the same count query twice to the storage database. """ try: cache = caches['db_cache'] results = [] for code_provider in code_providers: provider_id = code_provider['provider_id'] url_regexp = code_provider['origin_url_regexp'] cache_key = '%s_origins_count' % provider_id prev_cache_key = '%s_origins_prev_count' % provider_id # get cached origin count origin_count = cache.get(cache_key, -2) # cache entry has expired or does not exist if origin_count == -2: # mark the origin count as processing cache.set(cache_key, -1, timeout=10*60) # execute long count query origin_count = service.storage.origin_count(url_regexp, regexp=True) # cache count result cache.set(cache_key, origin_count, timeout=24*60*60) cache.set(prev_cache_key, origin_count, timeout=None) # origin count is currently processing elif origin_count == -1: # return previous count if it exists origin_count = cache.get(prev_cache_key, -1) results.append({ 'provider_id': provider_id, 'origin_count': origin_count, 'origin_types': code_provider['origin_types'] }) results = json.dumps(results) except Exception as exc: return handle_view_exception(request, exc, html_response=False) return HttpResponse(results, content_type='application/json') -@browse_route(r'origin/(?P[0-9]+)/latest_snapshot/', - view_name='browse-origin-latest-snapshot') -def _origin_latest_snapshot(request, origin_id): - """ - Internal browse endpoint used to check if an origin has already - been visited by Software Heritage and has at least one full visit. - """ - result = \ - service.lookup_latest_origin_snapshot(int(origin_id), - allowed_statuses=['full', - 'partial']) - - result = json.dumps(result, sort_keys=True, indent=4, - separators=(',', ': ')) - - return HttpResponse(result, content_type='application/json') - - @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/', r'origin/(?P.+)/', view_name='browse-origin') def origin_browse(request, origin_url, origin_type=None): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ last_snapshot_url = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url}) return redirect(last_snapshot_url)