diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 594a7943..5bec10a4 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,484 +1,483 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from functools import partial from swh.web.common import service from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.utils import enrich_origin, enrich_origin_visit from swh.web.api.views.utils import api_lookup DOC_RETURN_ORIGIN = ''' :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string url: the origin canonical url - :>json string type: the type of software origin (deprecated value; - types are now associated to visits instead of origins) - :>json number id: the origin unique identifier (deprecated value; - you should only refer to origins based on their URL) ''' DOC_RETURN_ORIGIN_ARRAY = \ DOC_RETURN_ORIGIN.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT = ''' :>json string date: ISO representation of the visit date (in UTC) :>json str origin: the origin canonical url :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit (may be null if status is not **full**). :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit (may be null if status is not **full**). :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit ''' DOC_RETURN_ORIGIN_VISIT_ARRAY = \ DOC_RETURN_ORIGIN_VISIT.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT_ARRAY += ''' :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/` in order to get information about the visit ''' @api_route(r'/origins/', 'api-1-origins') @api_doc('/origins/', noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origins(request): """ .. http:get:: /api/1/origins/ Get list of archived software origins. .. warning:: This endpoint used to provide an `origin_from` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :query int origin_count: The maximum number of origins to return (default to 100, can not exceed 10000) {return_origin_array} {common_headers} {resheader_link} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origins?origin_count=500` """ origin_from = int(request.query_params.get('origin_from', '1')) origin_count = int(request.query_params.get('origin_count', '100')) origin_count = min(origin_count, 10000) results = api_lookup( service.lookup_origins, origin_from, origin_count+1, enrich_fn=enrich_origin, request=request) response = {'results': results, 'headers': {}} if len(results) > origin_count: origin_from = results.pop()['id'] response['headers']['link-next'] = reverse( 'api-1-origins', query_params={'origin_from': origin_from, 'origin_count': origin_count}, request=request) + for result in results: + if 'id' in result: + del result['id'] return response @api_route(r'/origin/(?P.+)/get/', 'api-1-origin') @api_doc('/origin/') @format_docstring(return_origin=DOC_RETURN_ORIGIN) def api_origin(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/get/ Get information about a software origin. :param string origin_url: the origin url {return_origin} {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/get/` """ ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found.' % ori_dict['url'] return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request) @api_route(r'/origin/search/(?P.+)/', 'api-1-origin-search', throttle_scope='swh_api_origin_search') @api_doc('/origin/search/') @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. .. warning:: This endpoint used to provide an `offset` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :param string url_pattern: a string pattern :query int limit: the maximum number of found origins to return (bounded to 1000) :query boolean with_visit: if true, only return origins with at least one visit by Software heritage {return_origin_array} {common_headers} {resheader_link} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ result = {} limit = min(int(request.query_params.get('limit', '70')), 1000) page_token = request.query_params.get('page_token') with_visit = request.query_params.get('with_visit', 'false') (results, page_token) = api_lookup( service.search_origin, url_pattern, limit, bool(strtobool(with_visit)), page_token, enrich_fn=enrich_origin, request=request) if page_token is not None: query_params = {} query_params['limit'] = limit query_params['page_token'] = page_token result['headers'] = { 'link-next': reverse('api-1-origin-search', url_args={'url_pattern': url_pattern}, query_params=query_params, request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/metadata-search/', 'api-1-origin-metadata-search') @api_doc('/origin/metadata-search/', noargs=True, need_params=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return (bounded to 100) {return_origin_array} {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ fulltext = request.query_params.get('fulltext', None) limit = min(int(request.query_params.get('limit', '70')), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) results = api_lookup(service.search_origin_metadata, fulltext, limit, request=request) return { 'results': results, } @api_route(r'/origin/(?P.*)/visits/', 'api-1-origin-visits') @api_doc('/origin/visits/') @format_docstring( return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) def api_origin_visits(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param str origin_url: a software origin URL :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes {common_headers} {resheader_link} {return_origin_visit_array} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visits/` """ result = {} origin_query = {'url': origin_url} notfound_msg = 'No origin {} found'.format(origin_url) url_args_next = {'origin_url': origin_url} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_query, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits(origin_query) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v['visit'] == last_visit: visits = all_visits[i+1:i+1+per_page] break for v in visits: yield v results = api_lookup(_lookup_origin_visits, origin_query, notfound_msg=notfound_msg, enrich_fn=partial(enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True), request=request) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-origin-visits', url_args=url_args_next, query_params=query_params, request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/(?P.*)/visit/latest/', 'api-1-origin-visit-latest', throttle_scope='swh_api_origin_visit_latest') @api_doc('/origin/visit/latest/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit_latest(request, origin_url=None): """ .. http:get:: /api/1/origin/(origin_url)/visit/latest/ Get information about the latest visit of a software origin. :param str origin_url: a software origin URL :query boolean require_snapshot: if true, only return a visit with a snapshot {common_headers} {return_origin_visit} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ require_snapshot = request.query_params.get('require_snapshot', 'false') return api_lookup( service.lookup_origin_visit_latest, origin_url, bool(strtobool(require_snapshot)), notfound_msg=('No visit for origin {} found' .format(origin_url)), enrich_fn=partial(enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False), request=request) @api_route(r'/origin/(?P.*)/visit/(?P[0-9]+)/', 'api-1-origin-visit') @api_doc('/origin/visit/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit(request, visit_id, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param str origin_url: a software origin URL :param int visit_id: a visit identifier {common_headers} {return_origin_visit} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ return api_lookup( service.lookup_origin_visit, origin_url, int(visit_id), notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_url)), enrich_fn=partial(enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False), request=request) @api_route(r'/origin/(?P.+)' '/intrinsic-metadata', 'api-origin-intrinsic-metadata') @api_doc('/origin/intrinsic-metadata/') @format_docstring() def api_origin_intrinsic_metadata(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary). :param string origin_url: the origin url :>json string ???: intrinsic metadata field of the origin {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata` """ # noqa ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found' % ori_dict['url'] return api_lookup( service.lookup_origin_intrinsic_metadata, ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request) diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index defc47a5..76de4b0b 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,176 +1,176 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render, redirect from swh.web.common import service from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import ( reverse, format_utc_iso_date, parse_timestamp ) from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import get_snapshot_context from swh.web.browse.browseurls import browse_route from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases ) @browse_route(r'origin/(?P.+)/visit/(?P.+)/directory/', r'origin/(?P.+)/visit/(?P.+)' '/directory/(?P.+)/', r'origin/(?P.+)/directory/', r'origin/(?P.+)/directory/(?P.+)/', view_name='browse-origin-directory') def origin_directory_browse(request, origin_url, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_url)/directory/[(path)/]` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/directory/[(path)/]` """ # noqa return browse_snapshot_directory(request, origin_url=origin_url, timestamp=timestamp, path=path) @browse_route(r'origin/(?P.+)/visit/(?P.+)' '/content/(?P.+)/', r'origin/(?P.+)/content/(?P.+)/', view_name='browse-origin-content') def origin_content_browse(request, origin_url, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_url)/content/(path)/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/content/(path)/` """ # noqa language = request.GET.get('language', None) return browse_snapshot_content(request, origin_url=origin_url, timestamp=timestamp, path=path, selected_language=language) PER_PAGE = 20 @browse_route(r'origin/(?P.+)/visit/(?P.+)/log/', r'origin/(?P.+)/log/', view_name='browse-origin-log') def origin_log_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_url)/log/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/log/` """ # noqa return browse_snapshot_log(request, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P.+)/visit/(?P.+)/branches/', r'origin/(?P.+)/branches/', view_name='browse-origin-branches') def origin_branches_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_url)/branches/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/branches/` """ # noqa return browse_snapshot_branches(request, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P.+)/visit/(?P.+)/releases/', r'origin/(?P.+)/releases/', view_name='browse-origin-releases') def origin_releases_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_url)/releases/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/releases/` """ # noqa return browse_snapshot_releases(request, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P.+)/visits/', view_name='browse-origin-visits') def origin_visits_browse(request, origin_url): """Django view that produces an HTML display of visits reporting - for a swh origin identified by its id or its url. + for a given origin. The url that points to it is :http:get:`/browse/origin/(origin_url)/visits/`. """ try: origin_info = service.lookup_origin({'url': origin_url}) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') visit['fmt_date'] = format_utc_iso_date(visit['date']) query_params = {} if i < len(origin_visits) - 1: if visit['date'] == origin_visits[i+1]['date']: query_params = {'visit_id': visit['visit']} if i > 0: if visit['date'] == origin_visits[i-1]['date']: query_params = {'visit_id': visit['visit']} snapshot = visit['snapshot'] if visit['snapshot'] else '' visit['browse_url'] = reverse('browse-origin-directory', url_args={'origin_url': origin_url, 'timestamp': url_date}, query_params=query_params) if not snapshot: visit['snapshot'] = '' visit['date'] = parse_timestamp(visit['date']).timestamp() heading = 'Origin visits - %s' % origin_url return render(request, 'browse/origin-visits.html', {'heading': heading, 'swh_object_name': 'Visits', 'swh_object_metadata': origin_info, 'origin_visits': origin_visits, 'origin_info': origin_info, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': False}) @browse_route(r'origin/(?P.+)/', view_name='browse-origin') def origin_browse(request, origin_url): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ last_snapshot_url = reverse('browse-origin-directory', url_args={'origin_url': origin_url}) return redirect(last_snapshot_url) diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py index b3628ef4..bceb4560 100644 --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -1,180 +1,159 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import math from django.core.cache import cache from swh.web.common.exc import NotFoundExc from swh.web.common.utils import parse_timestamp def get_origin_visits(origin_info): """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_info (dict): dict describing the origin to fetch visits from Returns: list: A list of dict describing the origin visits with the following keys: * **date**: UTC visit date in ISO format, - * **origin**: the origin id + * **origin**: the origin url * **status**: the visit status, either **full**, **partial** or **ongoing** * **visit**: the visit id + * **type**: the visit type Raises: NotFoundExc: if the origin is not found """ from swh.web.common import service if 'url' in origin_info: origin_url = origin_info['url'] else: origin_url = service.lookup_origin(origin_info)['url'] cache_entry_id = 'origin_visits_%s' % origin_url cache_entry = cache.get(cache_entry_id) if cache_entry: last_visit = cache_entry[-1]['visit'] new_visits = list(service.lookup_origin_visits(origin_url, last_visit=last_visit)) if not new_visits: last_snp = service.lookup_latest_origin_snapshot(origin_url) if not last_snp or last_snp['id'] == cache_entry[-1]['snapshot']: return cache_entry origin_visits = [] per_page = service.MAX_LIMIT last_visit = None while 1: visits = list(service.lookup_origin_visits(origin_url, last_visit=last_visit, per_page=per_page)) origin_visits += visits if len(visits) < per_page: break else: if not last_visit: last_visit = per_page else: last_visit += per_page def _visit_sort_key(visit): ts = parse_timestamp(visit['date']).timestamp() return ts + (float(visit['visit']) / 10e3) for v in origin_visits: if 'metadata' in v: del v['metadata'] origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) return origin_visits def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a visit for a given origin. The visit is retrieved from a provided timestamp. The closest visit from that timestamp is selected. Args: origin_info (dict): a dict filled with origin information - (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A dict containing the visit info as described below:: - {'origin': 2, + {'origin': 'https://forge.softwareheritage.org/source/swh-web/', 'date': '2017-10-08T11:54:25.582463+00:00', 'metadata': {}, 'visit': 25, 'status': 'full'} """ visits = get_origin_visits(origin_info) if not visits: - if 'url' in origin_info: - message = ('No visit associated to origin with' - ' url %s!' % origin_info['url']) - else: - message = ('No visit associated to origin with' - ' id %s!' % origin_info['id']) - raise NotFoundExc(message) + raise NotFoundExc(('No visit associated to origin with' + ' url %s!' % origin_info['url'])) if snapshot_id: visit = [v for v in visits if v['snapshot'] == snapshot_id] if len(visit) == 0: - if 'type' in origin_info and 'url' in origin_info: - message = ('Visit for snapshot with id %s for origin with type' - ' url %s not found!' % - (snapshot_id, origin_info['url'])) - else: - message = ('Visit for snapshot with id %s for origin with' - ' id %s not found!' % - (snapshot_id, origin_info['id'])) - raise NotFoundExc(message) + raise NotFoundExc(('Visit for snapshot with id %s for origin with' + ' url %s not found!' % + (snapshot_id, origin_info['url']))) return visit[0] if visit_id: visit = [v for v in visits if v['visit'] == int(visit_id)] if len(visit) == 0: - if 'type' in origin_info and 'url' in origin_info: - message = ('Visit with id %s for origin with' - ' and url %s not found!' % - (visit_id, origin_info['url'])) - else: - message = ('Visit with id %s for origin with id %s' - ' not found!' % (visit_id, origin_info['id'])) - raise NotFoundExc(message) + raise NotFoundExc(('Visit with id %s for origin with' + ' url %s not found!' % + (visit_id, origin_info['url']))) return visit[0] if not visit_ts: # returns the latest full visit when no timestamp is provided for v in reversed(visits): if v['status'] == 'full': return v return visits[-1] target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) # Find the visit with date closest to the target (in absolute value) (abs_time_delta, visit_idx) = min( ((math.floor(parse_timestamp(visit['date']).timestamp()), i) for (i, visit) in enumerate(visits)), key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts)) if visit_idx is not None: visit = visits[visit_idx] # If multiple visits have the same date, select the one with # the largest id. while visit_idx < len(visits) - 1 and \ visit['date'] == visits[visit_idx+1]['date']: visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: - if 'type' in origin_info and 'url' in origin_info: - message = ('Visit with timestamp %s for origin with ' - 'and url %s not found!' % - (visit_ts, origin_info['url'])) - else: - message = ('Visit with timestamp %s for origin with id %s ' - 'not found!' % (visit_ts, origin_info['id'])) - raise NotFoundExc(message) + raise NotFoundExc(('Visit with timestamp %s for origin with ' + 'url %s not found!' % + (visit_ts, origin_info['url']))) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py index 89b64ba7..56bb1fdd 100644 --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -1,133 +1,136 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.web.common.exc import NotFoundExc from swh.web.common.origin_visits import ( get_origin_visits, get_origin_visit ) def test_get_origin_visits(mocker): mock_service = mocker.patch('swh.web.common.service') mock_service.MAX_LIMIT = 2 def _lookup_origin_visits(*args, **kwargs): if kwargs['last_visit'] is None: return [ { 'visit': 1, 'date': '2017-05-06T00:59:10+00:00', 'metadata': {} }, { 'visit': 2, 'date': '2017-08-06T00:59:10+00:00', 'metadata': {} } ] else: return [ { 'visit': 3, 'date': '2017-09-06T00:59:10+00:00', 'metadata': {} } ] mock_service.lookup_origin_visits.side_effect = _lookup_origin_visits origin_info = { 'id': 1, - 'type': 'git', + 'url': 'https://github.com/foo/bar', } origin_visits = get_origin_visits(origin_info) assert len(origin_visits) == 3 def test_get_origin_visit(mocker): mock_origin_visits = mocker.patch( 'swh.web.common.origin_visits.get_origin_visits') origin_info = { - 'id': 2, - 'type': 'git', 'url': 'https://github.com/foo/bar', } visits = [ { 'status': 'full', 'date': '2015-07-09T21:09:24+00:00', 'visit': 1, - 'origin': origin_info['id'] + 'origin': 'https://github.com/foo/bar', + 'type': 'git', }, { 'status': 'full', 'date': '2016-02-23T18:05:23.312045+00:00', 'visit': 2, - 'origin': origin_info['id'] + 'origin': 'https://github.com/foo/bar', + 'type': 'git', }, { 'status': 'full', 'date': '2016-03-28T01:35:06.554111+00:00', 'visit': 3, - 'origin': origin_info['id'] + 'origin': 'https://github.com/foo/bar', + 'type': 'git', }, { 'status': 'full', 'date': '2016-06-18T01:22:24.808485+00:00', 'visit': 4, - 'origin': origin_info['id'] + 'origin': 'https://github.com/foo/bar', + 'type': 'git', }, { 'status': 'full', 'date': '2016-08-14T12:10:00.536702+00:00', 'visit': 5, - 'origin': origin_info['id'] + 'origin': 'https://github.com/foo/bar', + 'type': 'git', } ] mock_origin_visits.return_value = visits visit_id = 12 with pytest.raises(NotFoundExc) as e: visit = get_origin_visit(origin_info, visit_id=visit_id) assert e.match('Visit with id %s' % visit_id) assert e.match('url %s' % origin_info['url']) visit = get_origin_visit(origin_info, visit_id=2) assert visit == visits[1] visit = get_origin_visit( origin_info, visit_ts='2016-02-23T18:05:23.312045+00:00') assert visit == visits[1] visit = get_origin_visit( origin_info, visit_ts='2016-02-20') assert visit == visits[1] visit = get_origin_visit( origin_info, visit_ts='2016-06-18T01:22') assert visit == visits[3] visit = get_origin_visit( origin_info, visit_ts='2016-06-18 01:22') assert visit == visits[3] visit = get_origin_visit( origin_info, visit_ts=1466208000) assert visit == visits[3] visit = get_origin_visit( origin_info, visit_ts='2014-01-01') assert visit == visits[0] visit = get_origin_visit( origin_info, visit_ts='2018-01-01') assert visit == visits[-1]