Page MenuHomeSoftware Heritage

origin.py
No OneTemporary

origin.py

# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
from distutils.util import strtobool
from django.core.cache import caches
from django.http import HttpResponse
from django.shortcuts import render, redirect
from django.views.decorators.cache import never_cache
from swh.web.common import service
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import (
reverse, format_utc_iso_date, parse_timestamp
)
from swh.web.common.exc import handle_view_exception
from swh.web.browse.utils import (
get_origin_info, get_snapshot_context
)
from swh.web.browse.browseurls import browse_route
from swh.web.misc.coverage import code_providers
from .utils.snapshot_context import (
browse_snapshot_directory, browse_snapshot_content,
browse_snapshot_log, browse_snapshot_branches,
browse_snapshot_releases
)
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/directory/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/directory/(?P<path>.+)/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/directory/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/directory/(?P<path>.+)/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)/directory/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)'
'/directory/(?P<path>.+)/',
r'origin/(?P<origin_url>.+)/directory/',
r'origin/(?P<origin_url>.+)/directory/(?P<path>.+)/',
view_name='browse-origin-directory')
def origin_directory_browse(request, origin_url, origin_type=None,
timestamp=None, path=None):
"""Django view for browsing the content of a directory associated
to an origin for a given visit.
The url scheme that points to it is the following:
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/directory/[(path)/]`
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/directory/[(path)/]`
""" # noqa
return browse_snapshot_directory(
request, origin_type=origin_type, origin_url=origin_url,
timestamp=timestamp, path=path)
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/content/(?P<path>.+)/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/content/(?P<path>.+)/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)'
'/content/(?P<path>.+)/',
r'origin/(?P<origin_url>.+)/content/(?P<path>.+)/',
view_name='browse-origin-content')
def origin_content_browse(request, origin_url, origin_type=None, path=None,
timestamp=None):
"""Django view that produces an HTML display of a content
associated to an origin for a given visit.
The url scheme that points to it is the following:
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/content/(path)/`
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/content/(path)/`
""" # noqa
return browse_snapshot_content(request, origin_type=origin_type,
origin_url=origin_url, timestamp=timestamp,
path=path)
PER_PAGE = 20
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/log/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)/log/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)/log/',
r'origin/(?P<origin_url>.+)/log/',
view_name='browse-origin-log')
def origin_log_browse(request, origin_url, origin_type=None, timestamp=None):
"""Django view that produces an HTML display of revisions history (aka
the commit log) associated to a software origin.
The url scheme that points to it is the following:
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/log/`
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/log/`
""" # noqa
return browse_snapshot_log(request, origin_type=origin_type,
origin_url=origin_url, timestamp=timestamp)
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/branches/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/branches/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)/branches/',
r'origin/(?P<origin_url>.+)/branches/',
view_name='browse-origin-branches')
def origin_branches_browse(request, origin_url, origin_type=None,
timestamp=None):
"""Django view that produces an HTML display of the list of branches
associated to an origin for a given visit.
The url scheme that points to it is the following:
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/branches/`
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/branches/`
""" # noqa
return browse_snapshot_branches(request, origin_type=origin_type,
origin_url=origin_url, timestamp=timestamp)
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/visit/(?P<timestamp>.+)/releases/',
r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)'
'/releases/',
r'origin/(?P<origin_url>.+)/visit/(?P<timestamp>.+)/releases/',
r'origin/(?P<origin_url>.+)/releases/',
view_name='browse-origin-releases')
def origin_releases_browse(request, origin_url, origin_type=None,
timestamp=None):
"""Django view that produces an HTML display of the list of releases
associated to an origin for a given visit.
The url scheme that points to it is the following:
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/releases/`
* :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/releases/`
""" # noqa
return browse_snapshot_releases(request, origin_type=origin_type,
origin_url=origin_url, timestamp=timestamp)
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)/visits/',
r'origin/(?P<origin_url>.+)/visits/',
view_name='browse-origin-visits')
def origin_visits_browse(request, origin_url, origin_type=None):
"""Django view that produces an HTML display of visits reporting
for a swh origin identified by its id or its url.
The url that points to it is
:http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visits/`.
"""
try:
origin_info = get_origin_info(origin_url, origin_type)
origin_visits = get_origin_visits(origin_info)
snapshot_context = get_snapshot_context(origin_type=origin_type,
origin_url=origin_url)
except Exception as exc:
return handle_view_exception(request, exc)
for i, visit in enumerate(origin_visits):
url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ')
visit['fmt_date'] = format_utc_iso_date(visit['date'])
query_params = {}
if i < len(origin_visits) - 1:
if visit['date'] == origin_visits[i+1]['date']:
query_params = {'visit_id': visit['visit']}
if i > 0:
if visit['date'] == origin_visits[i-1]['date']:
query_params = {'visit_id': visit['visit']}
snapshot = visit['snapshot'] if visit['snapshot'] else ''
visit['browse_url'] = reverse('browse-origin-directory',
url_args={'origin_type': origin_type,
'origin_url': origin_url,
'timestamp': url_date},
query_params=query_params)
if not snapshot:
visit['snapshot'] = ''
visit['date'] = parse_timestamp(visit['date']).timestamp()
heading = 'Origin visits - %s' % origin_url
return render(request, 'browse/origin-visits.html',
{'heading': heading,
'swh_object_name': 'Visits',
'swh_object_metadata': origin_info,
'origin_visits': origin_visits,
'origin_info': origin_info,
'snapshot_context': snapshot_context,
'vault_cooking': None,
'show_actions_menu': False})
@browse_route(r'origin/search/(?P<url_pattern>.+)/',
view_name='browse-origin-search')
def _origin_search(request, url_pattern):
"""Internal browse endpoint to search for origins whose urls contain
a provided string pattern or match a provided regular expression.
The search is performed in a case insensitive way.
"""
offset = int(request.GET.get('offset', '0'))
limit = int(request.GET.get('limit', '50'))
regexp = request.GET.get('regexp', 'false')
with_visit = request.GET.get('with_visit', 'false')
url_pattern = url_pattern.replace('///', '\\')
try:
results = service.search_origin(url_pattern, offset, limit,
bool(strtobool(regexp)),
bool(strtobool(with_visit)))
results = json.dumps(list(results), sort_keys=True, indent=4,
separators=(',', ': '))
except Exception as exc:
return handle_view_exception(request, exc, html_response=False)
return HttpResponse(results, content_type='application/json')
@browse_route(r'origin/coverage_count/',
view_name='browse-origin-coverage-count')
@never_cache
def _origin_coverage_count(request):
"""Internal browse endpoint to count the number of origins associated
to each code provider declared in the archive coverage list.
As this operation takes some times, we execute it once per day and
cache its results to database. The cached origin counts are then served.
Cache management is handled in the implementation to avoid sending
the same count query twice to the storage database.
"""
try:
cache = caches['db_cache']
results = []
for code_provider in code_providers:
provider_id = code_provider['provider_id']
url_regexp = code_provider['origin_url_regexp']
cache_key = '%s_origins_count' % provider_id
prev_cache_key = '%s_origins_prev_count' % provider_id
# get cached origin count
origin_count = cache.get(cache_key, -2)
# cache entry has expired or does not exist
if origin_count == -2:
# mark the origin count as processing
cache.set(cache_key, -1, timeout=10*60)
# execute long count query
origin_count = service.storage.origin_count(url_regexp,
regexp=True)
# cache count result
cache.set(cache_key, origin_count, timeout=24*60*60)
cache.set(prev_cache_key, origin_count, timeout=None)
# origin count is currently processing
elif origin_count == -1:
# return previous count if it exists
origin_count = cache.get(prev_cache_key, -1)
results.append({
'provider_id': provider_id,
'origin_count': origin_count,
'origin_types': code_provider['origin_types']
})
results = json.dumps(results)
except Exception as exc:
return handle_view_exception(request, exc, html_response=False)
return HttpResponse(results, content_type='application/json')
@browse_route(r'origin/(?P<origin_id>[0-9]+)/latest_snapshot/',
view_name='browse-origin-latest-snapshot')
def _origin_latest_snapshot(request, origin_id):
"""
Internal browse endpoint used to check if an origin has already
been visited by Software Heritage and has at least one full visit.
"""
result = \
service.lookup_latest_origin_snapshot(int(origin_id),
allowed_statuses=['full',
'partial'])
result = json.dumps(result, sort_keys=True, indent=4,
separators=(',', ': '))
return HttpResponse(result, content_type='application/json')
@browse_route(r'origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)/',
r'origin/(?P<origin_url>.+)/',
view_name='browse-origin')
def origin_browse(request, origin_url, origin_type=None):
"""Django view that redirects to the display of the latest archived
snapshot for a given software origin.
"""
last_snapshot_url = reverse('browse-origin-directory',
url_args={'origin_type': origin_type,
'origin_url': origin_url})
return redirect(last_snapshot_url)

File Metadata

Mime Type
text/x-python
Expires
Fri, Jul 4, 12:43 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3350476

Event Timeline