diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 71c179e4..cb3c0625 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,383 +1,382 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from swh.web.common import service from swh.web.common.exc import BadInputExc -from swh.web.common.utils import ( - reverse, get_origin_visits -) +from swh.web.common.origin_visits import get_origin_visits +from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup def _enrich_origin(origin): if 'id' in origin: o = origin.copy() o['origin_visits_url'] = \ reverse('api-origin-visits', url_args={'origin_id': origin['id']}) return o return origin @api_route(r'/origin/(?P[0-9]+)/', 'api-origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)/', 'api-origin') @api_doc('/origin/') def api_origin(request, origin_id=None, origin_type=None, origin_url=None): """ .. http:get:: /api/1/origin/(origin_id)/ Get information about a software origin. :param int origin_id: a software origin identifier :>json number id: the origin unique identifier :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string type: the type of software origin (possible values are ``git``, ``svn``, ``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``) :>json string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1/` .. http:get:: /api/1/origin/(origin_type)/url/(origin_url)/ Get information about a software origin. :param string origin_type: the origin type (possible values are ``git``, ``svn``, ``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``) :param string origin_url: the origin url :>json number id: the origin unique identifier :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string type: the type of software origin :>json string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/git/url/https://github.com/python/cpython/` """ # noqa ori_dict = { 'id': origin_id, 'type': origin_type, 'url': origin_url } ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]} if 'id' in ori_dict: error_msg = 'Origin with id %s not found.' % ori_dict['id'] else: error_msg = 'Origin with type %s and URL %s not found' % ( ori_dict['type'], ori_dict['url']) return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=_enrich_origin) @api_route(r'/origin/search/(?P.+)/', 'api-origin-search') @api_doc('/origin/search/') def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. :param string url_pattern: a string pattern or a regular expression :query int offset: the number of found origins to skip before returning results :query int limit: the maximum number of found origins to return :query boolean regexp: if true, consider provided pattern as a regular expression and search origins whose urls match it :query boolean with_visit: if true, only return origins with at least one visit by Software heritage :>jsonarr number id: the origin unique identifier :>jsonarr string origin_visits_url: link to in order to get information about the visits for that origin :>jsonarr string type: the type of software origin :>jsonarr string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ # noqa result = {} offset = int(request.query_params.get('offset', '0')) limit = int(request.query_params.get('limit', '70')) regexp = request.query_params.get('regexp', 'false') with_visit = request.query_params.get('with_visit', 'false') results = api_lookup(service.search_origin, url_pattern, offset, limit, bool(strtobool(regexp)), bool(strtobool(with_visit)), enrich_fn=_enrich_origin) nb_results = len(results) if nb_results == limit: query_params = {} query_params['offset'] = offset + limit query_params['limit'] = limit query_params['regexp'] = regexp result['headers'] = { 'link-next': reverse('api-origin-search', url_args={'url_pattern': url_pattern}, query_params=query_params) } result.update({ 'results': results }) return result @api_route(r'/origin/metadata-search/', 'api-origin-metadata-search') @api_doc('/origin/metadata-search/', noargs=True) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return (bounded to 100) :>jsonarr number origin_id: the origin unique identifier :>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary) :>jsonarr string from_revision: the revision used to extract these metadata (the current HEAD or one of the former HEADs) :>jsonarr dict tool: the tool used to extract these metadata :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ # noqa fulltext = request.query_params.get('fulltext', None) limit = min(int(request.query_params.get('limit', '70')), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) results = api_lookup(service.search_origin_metadata, fulltext, limit) return { 'results': results, } @api_route(r'/origin/(?P[0-9]+)/visits/', 'api-origin-visits') @api_doc('/origin/visits/') def api_origin_visits(request, origin_id): """ .. http:get:: /api/1/origin/(origin_id)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param int origin_id: a software origin identifier :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it :>jsonarr string date: ISO representation of the visit date (in UTC) :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_id)/visit/(visit_id)/` in order to get information about the visit :>jsonarr string snapshot: the snapshot identifier of the visit :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit :>jsonarr string status: status of the visit (either **full**, **partial** or **ongoing**) :>jsonarr number visit: the unique identifier of the visit **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1/visits/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits({'id': origin_id}) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v['visit'] == last_visit: visits = all_visits[i+1:i+1+per_page] break for v in visits: yield v def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_visit_url'] = reverse('api-origin-visit', url_args={'origin_id': origin_id, 'visit_id': ov['visit']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None return ov results = api_lookup(_lookup_origin_visits, origin_id, notfound_msg='No origin {} found'.format(origin_id), enrich_fn=_enrich_origin_visit) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-origin-visits', url_args={'origin_id': origin_id}, query_params=query_params) } result.update({ 'results': results }) return result @api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/', 'api-origin-visit') @api_doc('/origin/visit/') def api_origin_visit(request, origin_id, visit_id): """ .. http:get:: /api/1/origin/(origin_id)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param int origin_id: a software origin identifier :param int visit_id: a visit identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json string date: ISO representation of the visit date (in UTC) :>json number origin: the origin unique identifier :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1500/visit/1/` """ # noqa def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_url'] = reverse('api-origin', url_args={'origin_id': ov['origin']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None return ov return api_lookup( service.lookup_origin_visit, origin_id, visit_id, notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_id)), enrich_fn=_enrich_origin_visit) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 891c6074..c64a80bd 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,505 +1,501 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.utils import parse_timestamp from swh.web.api import utils from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) else: # content result['content'] = utils.enrich_content(content) return result @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/ts/(?P.+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)' r'/ts/(?P.+)/log/', 'api-revision-origin-log') @api_doc('/revision/origin/log/') def api_revision_log_by(request, origin_id, - branch_name='refs/heads/master', + branch_name='HEAD', ts=None): """ .. http:get:: /api/1/revision/origin/(origin_id)[/branch/(branch_name)][/ts/(timestamp)]/log Show the commit log for a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/`, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. :param int origin_id: a software origin identifier :param string branch_name: optional parameter specifying a fully-qualified branch name - associated to the software origin, e.g., "refs/heads/master". Defaults to the master branch. + associated to the software origin, e.g., "refs/heads/master". Defaults to the HEAD branch. :param string timestamp: optional parameter specifying a timestamp close to which the revision pointed by the given branch should be looked up. The timestamp can be expressed either as an ISO date or as a Unix one (in UTC). Defaults to now. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>jsonarr object author: information about the author of the revision :>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>jsonarr object committer: information about the committer of the revision :>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>jsonarr string committer_date: ISO representation of the commit date (in UTC) :>jsonarr string date: ISO representation of the revision date (in UTC) :>jsonarr string directory: the unique identifier that revision points to :>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>jsonarr string id: the revision unique identifier :>jsonarr boolean merge: whether or not the revision corresponds to a merge commit :>jsonarr string message: the message associated to the revision :>jsonarr array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>jsonarr string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: no revision matching the given criteria could be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/origin/723566/ts/2016-01-17T00:00:00+00:00/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) - if ts: - ts = parse_timestamp(ts) - def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1): return service.lookup_revision_log_by(o_id, br, ts, limit) error_msg = 'No revision matching origin %s ' % origin_id error_msg += ', branch name %s' % branch_name error_msg += (' and time stamp %s.' % ts) if ts else '.' rev_get = api_lookup( lookup_revision_log_by_with_limit, origin_id, branch_name, ts, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) nb_rev = len(rev_get) if nb_rev == per_page+1: revisions = rev_get[:-1] last_sha1_git = rev_get[-1]['id'] params = {k: v for k, v in {'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts, }.items() if v is not None} query_params = {} query_params['sha1_git'] = last_sha1_git if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-revision-origin-log', url_args=params, query_params=query_params) } else: revisions = rev_get result.update({'results': revisions}) return result @api_route(r'/revision/origin/(?P[0-9]+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/(?P.+)/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)' r'/directory/(?P.+)/', 'api-revision-origin-directory') @api_doc('/revision/origin/directory/', tags=['hidden']) def api_directory_through_revision_origin(request, origin_id, branch_name="refs/heads/master", ts=None, path=None, with_data=False): """ Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = parse_timestamp(ts) return _revision_directory_by({'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data) @api_route(r'/revision/origin/(?P[0-9]+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/', 'api-revision-origin') @api_doc('/revision/origin/') def api_revision_with_origin(request, origin_id, - branch_name="refs/heads/master", + branch_name='HEAD', ts=None): """ .. http:get:: /api/1/revision/origin/(origin_id)/[branch/(branch_name)/][ts/(timestamp)/] Get information about a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)/`, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. :param int origin_id: a software origin identifier :param string branch_name: optional parameter specifying a fully-qualified branch name - associated to the software origin, e.g., "refs/heads/master". Defaults to the master branch. + associated to the software origin, e.g., "refs/heads/master". Defaults to the HEAD branch. :param string timestamp: optional parameter specifying a timestamp close to which the revision pointed by the given branch should be looked up. The timestamp can be expressed either as an ISO date or as a Unix one (in UTC). Defaults to now. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the revision :>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: no revision matching the given criteria could be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/` """ # noqa - ts = parse_timestamp(ts) return api_lookup( service.lookup_revision_by, origin_id, branch_name, ts, notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/prev/(?P[0-9a-f/]+)/', 'api-revision-context') @api_doc('/revision/prev/', tags=['hidden']) def api_revision_with_context(request, sha1_git, context): """ Return information about revision with id sha1_git. """ def _enrich_revision(revision, context=context): return utils.enrich_revision(revision, context) return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git %s not found.' % sha1_git, enrich_fn=_enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/', 'api-revision') @api_doc('/revision/') def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the revision :>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ # noqa return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'api-revision-raw-message') @api_doc('/revision/raw/', tags=['hidden'], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'api-revision-directory') @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'api-revision-directory') @api_doc('/revision/directory/') def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ # noqa return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-revision-log') @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f/]+)/log/', 'api-revision-log') @api_doc('/revision/log/') def api_revision_log(request, sha1_git, prev_sha1s=None): """ .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ Get a list of all revisions heading to a given one, in other words show the commit log. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string prev_sha1s: optional parameter representing the navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. If provided, revisions information will be added at the beginning of the returned list. :query int per_page: number of elements in the returned list, for pagination purpose :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it :>jsonarr object author: information about the author of the revision :>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>jsonarr object committer: information about the committer of the revision :>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>jsonarr string committer_date: ISO representation of the commit date (in UTC) :>jsonarr string date: ISO representation of the revision date (in UTC) :>jsonarr string directory: the unique identifier that revision points to :>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>jsonarr string id: the revision unique identifier :>jsonarr boolean merge: whether or not the revision corresponds to a merge commit :>jsonarr string message: the message associated to the revision :>jsonarr array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>jsonarr string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) nb_rev = len(rev_get) if nb_rev == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-revision-log', url_args={'sha1_git': new_last_sha1}, query_params=query_params) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py index 6a2299f0..2a458e52 100644 --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -1,1210 +1,1121 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from collections import defaultdict import magic -import math import pypandoc import stat import textwrap from django.core.cache import cache from django.utils.safestring import mark_safe from importlib import reload from swh.model.identifiers import persistent_identifier from swh.web.common import highlightjs, service from swh.web.common.exc import NotFoundExc, http_status_code_message +from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( - reverse, format_utc_iso_date, parse_timestamp, - get_origin_visits, get_swh_persistent_id, + reverse, format_utc_iso_date, get_swh_persistent_id, swh_object_icons ) from swh.web.config import get_config def get_directory_entries(sha1_git): """Function that retrieves the content of a directory from the archive. The directories entries are first sorted in lexicographical order. Sub-directories and regular files are then extracted. Args: sha1_git: sha1_git identifier of the directory Returns: A tuple whose first member corresponds to the sub-directories list and second member the regular files list Raises: NotFoundExc if the directory is not found """ cache_entry_id = 'directory_entries_%s' % sha1_git cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry entries = list(service.lookup_directory(sha1_git)) for e in entries: e['perms'] = stat.filemode(e['perms']) if e['type'] == 'rev': # modify dir entry name to explicitly show it points # to a revision e['name'] = '%s @ %s' % (e['name'], e['target'][:7]) dirs = [e for e in entries if e['type'] in ('dir', 'rev')] files = [e for e in entries if e['type'] == 'file'] dirs = sorted(dirs, key=lambda d: d['name']) files = sorted(files, key=lambda f: f['name']) cache.set(cache_entry_id, (dirs, files)) return dirs, files def get_mimetype_and_encoding_for_content(content): """Function that returns the mime type and the encoding associated to a content buffer using the magic module under the hood. Args: content (bytes): a content buffer Returns: A tuple (mimetype, encoding), for instance ('text/plain', 'us-ascii'), associated to the provided content. """ while True: try: magic_result = magic.detect_from_content(content) mime_type = magic_result.mime_type encoding = magic_result.encoding break except Exception: # workaround an issue with the magic module who can fail # if detect_from_content is called multiple times in # a short amount of time reload(magic) return mime_type, encoding # maximum authorized content size in bytes for HTML display # with code highlighting content_display_max_size = get_config()['content_display_max_size'] snapshot_content_max_size = get_config()['snapshot_content_max_size'] def request_content(query_string, max_size=content_display_max_size, raise_if_unavailable=True, reencode=True): """Function that retrieves a content from the archive. Raw bytes content is first retrieved, then the content mime type. If the mime type is not stored in the archive, it will be computed using Python magic module. Args: query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either ``sha1``, ``sha1_git``, ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH the hexadecimal representation of the hash value max_size: the maximum size for a content to retrieve (default to 1MB, no size limit if None) Returns: A tuple whose first member corresponds to the content raw bytes and second member the content mime type Raises: NotFoundExc if the content is not found """ content_data = service.lookup_content(query_string) filetype = None language = None license = None # requests to the indexer db may fail so properly handle # those cases in order to avoid content display errors try: filetype = service.lookup_content_filetype(query_string) language = service.lookup_content_language(query_string) license = service.lookup_content_license(query_string) except Exception: pass mimetype = 'unknown' encoding = 'unknown' if filetype: mimetype = filetype['mimetype'] encoding = filetype['encoding'] # workaround when encountering corrupted data due to implicit # conversion from bytea to text in the indexer db (see T818) # TODO: Remove that code when all data have been correctly converted if mimetype.startswith('\\'): filetype = None content_data['error_code'] = 200 content_data['error_message'] = '' content_data['error_description'] = '' if not max_size or content_data['length'] < max_size: try: content_raw = service.lookup_content_raw(query_string) except Exception as e: if raise_if_unavailable: raise e else: content_data['raw_data'] = None content_data['error_code'] = 404 content_data['error_description'] = \ 'The bytes of the content are currently not available in the archive.' # noqa content_data['error_message'] = \ http_status_code_message[content_data['error_code']] else: content_data['raw_data'] = content_raw['data'] if not filetype: mimetype, encoding = \ get_mimetype_and_encoding_for_content(content_data['raw_data']) # noqa # encode textual content to utf-8 if needed if reencode and mimetype.startswith('text/'): # probably a malformed UTF-8 content, re-encode it # by replacing invalid chars with a substitution one if encoding == 'unknown-8bit': content_data['raw_data'] = \ content_data['raw_data'].decode('utf-8', 'replace')\ .encode('utf-8') elif 'ascii' not in encoding and encoding not in ['utf-8', 'binary']: # noqa content_data['raw_data'] = \ content_data['raw_data'].decode(encoding, 'replace')\ .encode('utf-8') elif reencode and mimetype.startswith('application/octet-stream'): # file may detect a text content as binary # so try to decode it for display encodings = ['us-ascii'] encodings += ['iso-8859-%s' % i for i in range(1, 17)] for encoding in encodings: try: content_data['raw_data'] = \ content_data['raw_data'].decode(encoding)\ .encode('utf-8') except Exception: pass else: # ensure display in content view mimetype = 'text/plain' break else: content_data['raw_data'] = None content_data['mimetype'] = mimetype content_data['encoding'] = encoding if language: content_data['language'] = language['lang'] else: content_data['language'] = 'not detected' if license: content_data['licenses'] = ', '.join(license['facts'][0]['licenses']) else: content_data['licenses'] = 'not detected' return content_data _browsers_supported_image_mimes = set(['image/gif', 'image/png', 'image/jpeg', 'image/bmp', 'image/webp', 'image/svg', 'image/svg+xml']) def prepare_content_for_display(content_data, mime_type, path): """Function that prepares a content for HTML display. The function tries to associate a programming language to a content in order to perform syntax highlighting client-side using highlightjs. The language is determined using either the content filename or its mime type. If the mime type corresponds to an image format supported by web browsers, the content will be encoded in base64 for displaying the image. Args: content_data (bytes): raw bytes of the content mime_type (string): mime type of the content path (string): path of the content including filename Returns: A dict containing the content bytes (possibly different from the one provided as parameter if it is an image) under the key 'content_data and the corresponding highlightjs language class under the key 'language'. """ language = highlightjs.get_hljs_language_from_filename(path) if not language: language = highlightjs.get_hljs_language_from_mime_type(mime_type) if not language: language = 'nohighlight' elif mime_type.startswith('application/'): mime_type = mime_type.replace('application/', 'text/') if mime_type.startswith('image/'): if mime_type in _browsers_supported_image_mimes: content_data = base64.b64encode(content_data) else: content_data = None if mime_type.startswith('image/svg'): mime_type = 'image/svg+xml' return {'content_data': content_data, 'language': language, 'mimetype': mime_type} -def get_origin_visit(origin_info, visit_ts=None, visit_id=None, - snapshot_id=None): - """Function that returns information about a visit for - a given origin. - The visit is retrieved from a provided timestamp. - The closest visit from that timestamp is selected. - - Args: - origin_info (dict): a dict filled with origin information - (id, url, type) - visit_ts (int or str): an ISO date string or Unix timestamp to parse - - Returns: - A dict containing the visit info as described below:: - - {'origin': 2, - 'date': '2017-10-08T11:54:25.582463+00:00', - 'metadata': {}, - 'visit': 25, - 'status': 'full'} - - """ - visits = get_origin_visits(origin_info) - - if not visits: - raise NotFoundExc('No visit associated to origin with' - ' type %s and url %s!' % (origin_info['type'], - origin_info['url'])) - - if snapshot_id: - visit = [v for v in visits if v['snapshot'] == snapshot_id] - if len(visit) == 0: - raise NotFoundExc( - 'Visit for snapshot with id %s for origin with type %s' - ' and url %s not found!' % (snapshot_id, origin_info['type'], - origin_info['url'])) - return visit[0] - - if visit_id: - visit = [v for v in visits if v['visit'] == int(visit_id)] - if len(visit) == 0: - raise NotFoundExc( - 'Visit with id %s for origin with type %s' - ' and url %s not found!' % (visit_id, origin_info['type'], - origin_info['url'])) - return visit[0] - - if not visit_ts: - # returns the latest full visit when no timestamp is provided - for v in reversed(visits): - if v['status'] == 'full': - return v - return visits[-1] - - parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) - - visit_idx = None - for i, visit in enumerate(visits): - ts = math.floor(parse_timestamp(visit['date']).timestamp()) - if i == 0 and parsed_visit_ts <= ts: - return visit - elif i == len(visits) - 1: - if parsed_visit_ts >= ts: - return visit - else: - next_ts = math.floor( - parse_timestamp(visits[i+1]['date']).timestamp()) - if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: - if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): - visit_idx = i - break - else: - visit_idx = i+1 - break - - if visit_idx is not None: - visit = visits[visit_idx] - while visit_idx < len(visits) - 1 and \ - visit['date'] == visits[visit_idx+1]['date']: - visit_idx = visit_idx + 1 - visit = visits[visit_idx] - return visit - else: - raise NotFoundExc( - 'Visit with timestamp %s for origin with type %s and url %s not found!' % # noqa - (visit_ts, origin_info['type'], origin_info['url'])) - - def process_snapshot_branches(snapshot): """ Process a dictionary describing snapshot branches: extract those targeting revisions and releases, put them in two different lists, then sort those lists in lexicographical order of the branches' names. Args: snapshot_branches (dict): A dict describing the branches of a snapshot as returned for instance by :func:`swh.web.common.service.lookup_snapshot` Returns: tuple: A tuple whose first member is the sorted list of branches targeting revisions and second member the sorted list of branches targeting releases """ # noqa snapshot_branches = snapshot['branches'] branches = {} branch_aliases = {} releases = {} revision_to_branch = defaultdict(set) revision_to_release = defaultdict(set) release_to_branch = defaultdict(set) for branch_name, target in snapshot_branches.items(): if not target: # FIXME: display branches with an unknown target anyway continue target_id = target['target'] target_type = target['target_type'] if target_type == 'revision': branches[branch_name] = { 'name': branch_name, 'revision': target_id, } revision_to_branch[target_id].add(branch_name) elif target_type == 'release': release_to_branch[target_id].add(branch_name) elif target_type == 'alias': branch_aliases[branch_name] = target_id # FIXME: handle pointers to other object types def _enrich_release_branch(branch, release): releases[branch] = { 'name': release['name'], 'branch_name': branch, 'date': format_utc_iso_date(release['date']), 'id': release['id'], 'message': release['message'], 'target_type': release['target_type'], 'target': release['target'], } def _enrich_revision_branch(branch, revision): branches[branch].update({ 'revision': revision['id'], 'directory': revision['directory'], 'date': format_utc_iso_date(revision['date']), 'message': revision['message'] }) releases_info = service.lookup_release_multiple( release_to_branch.keys() ) for release in releases_info: branches_to_update = release_to_branch[release['id']] for branch in branches_to_update: _enrich_release_branch(branch, release) if release['target_type'] == 'revision': revision_to_release[release['target']].update( branches_to_update ) revisions = service.lookup_revision_multiple( set(revision_to_branch.keys()) | set(revision_to_release.keys()) ) for revision in revisions: if not revision: continue for branch in revision_to_branch[revision['id']]: _enrich_revision_branch(branch, revision) for release in revision_to_release[revision['id']]: releases[release]['directory'] = revision['directory'] for branch_alias, branch_target in branch_aliases.items(): if branch_target in branches: branches[branch_alias] = dict(branches[branch_target]) else: snp = service.lookup_snapshot(snapshot['id'], branches_from=branch_target, branches_count=1) if snp and branch_target in snp['branches']: target_type = snp['branches'][branch_target]['target_type'] target = snp['branches'][branch_target]['target'] if target_type == 'revision': branches[branch_alias] = snp['branches'][branch_target] revision = service.lookup_revision(target) _enrich_revision_branch(branch_alias, revision) elif target_type == 'release': release = service.lookup_release(target) _enrich_release_branch(branch_alias, release) if branch_alias in branches: branches[branch_alias]['name'] = branch_alias ret_branches = list(sorted(branches.values(), key=lambda b: b['name'])) ret_releases = list(sorted(releases.values(), key=lambda b: b['name'])) return ret_branches, ret_releases def get_snapshot_content(snapshot_id): """Returns the lists of branches and releases associated to a swh snapshot. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: snapshot_id (str): hexadecimal representation of the snapshot identifier Returns: A tuple with two members. The first one is a list of dict describing the snapshot branches. The second one is a list of dict describing the snapshot releases. Raises: NotFoundExc if the snapshot does not exist """ cache_entry_id = 'swh_snapshot_%s' % snapshot_id cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry['branches'], cache_entry['releases'] branches = [] releases = [] if snapshot_id: snapshot = service.lookup_snapshot( snapshot_id, branches_count=snapshot_content_max_size) branches, releases = process_snapshot_branches(snapshot) cache.set(cache_entry_id, { 'branches': branches, 'releases': releases, }) return branches, releases def get_origin_visit_snapshot(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Returns the lists of branches and releases associated to a swh origin for a given visit. The visit is expressed by a timestamp. In the latter case, the closest visit from the provided timestamp will be used. If no visit parameter is provided, it returns the list of branches found for the latest visit. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse visit_id (int): optional visit id for disambiguation in case several visits have the same timestamp Returns: A tuple with two members. The first one is a list of dict describing the origin branches for the given visit. The second one is a list of dict describing the origin releases for the given visit. Raises: NotFoundExc if the origin or its visit are not found """ visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) return get_snapshot_content(visit_info['snapshot']) def gen_link(url, link_text=None, link_attrs={}): """ Utility function for generating an HTML link to insert in Django templates. Args: url (str): an url link_text (str): optional text for the produced link, if not provided the url will be used link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ attrs = ' ' for k, v in link_attrs.items(): attrs += '%s="%s" ' % (k, v) if not link_text: link_text = url link = '%s' % (attrs, url, link_text) return mark_safe(link) def gen_person_link(person_id, person_name, snapshot_context=None, link_attrs={}): """ Utility function for generating a link to a person HTML view to insert in Django templates. Args: person_id (int): a person id person_name (str): the associated person name link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'person_name' """ query_params = None if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] query_params = {'origin_type': origin_info['type'], 'origin': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: query_params['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] elif snapshot_context: query_params = {'snapshot_id': snapshot_context['snapshot_id']} person_url = reverse('browse-person', url_args={'person_id': person_id}, query_params=query_params) return gen_link(person_url, person_name or 'None', link_attrs) def gen_revision_url(revision_id, snapshot_context=None): """ Utility function for generating an url to a revision. Args: revision_id (str): a revision id snapshot_context (dict): if provided, generate snapshot-dependent browsing url Returns: str: The url to browse the revision """ query_params = None if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] origin_type = snapshot_context['origin_type'] query_params = {'origin_type': origin_type, 'origin': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: query_params['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] elif snapshot_context: query_params = {'snapshot_id': snapshot_context['snapshot_id']} return reverse('browse-revision', url_args={'sha1_git': revision_id}, query_params=query_params) def gen_revision_link(revision_id, shorten_id=False, snapshot_context=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a revision HTML view to insert in Django templates. Args: revision_id (str): a revision id shorten_id (boolean): whether to shorten the revision id to 7 characters for the link text snapshot_context (dict): if provided, generate snapshot-dependent browsing link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: str: An HTML link in the form 'revision_id' """ if not revision_id: return None revision_url = gen_revision_url(revision_id, snapshot_context) if shorten_id: return gen_link(revision_url, revision_id[:7], link_attrs) else: if not link_text: link_text = revision_id return gen_link(revision_url, link_text, link_attrs) def gen_origin_link(origin_info, link_attrs={}): """ Utility function for generating a link to a software origin HTML view to insert in Django templates. Args: origin_info (dict): a dict filled with origin information (id, type, url) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'Origin: origin_url' """ # noqa origin_browse_url = reverse('browse-origin', url_args={'origin_type': origin_info['type'], 'origin_url': origin_info['url']}) return gen_link(origin_browse_url, 'Origin: ' + origin_info['url'], link_attrs) def gen_directory_link(sha1_git, link_text=None, link_attrs={}): """ Utility function for generating a link to a directory HTML view to insert in Django templates. Args: sha1_git (str): directory identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not sha1_git: return None directory_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) if not link_text: link_text = directory_url return gen_link(directory_url, link_text, link_attrs) def gen_snapshot_link(snapshot_id, link_text=None, link_attrs={}): """ Utility function for generating a link to a snapshot HTML view to insert in Django templates. Args: snapshot_id (str): snapshot identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ snapshot_url = reverse('browse-snapshot', url_args={'snapshot_id': snapshot_id}) if not link_text: link_text = snapshot_url return gen_link(snapshot_url, link_text, link_attrs) def gen_snapshot_directory_link(snapshot_context, revision_id=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a directory HTML view in the context of a snapshot to insert in Django templates. Args: snapshot_context (dict): the snapshot information revision_id (str): optional revision identifier in order to use the associated directory link_text (str): optional text to use for the generated link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'origin_directory_view_url' """ query_params = {'revision': revision_id} if snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] url_args = {'origin_url': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: url_args['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] directory_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) else: url_args = {'snapshot_id': snapshot_context['snapshot_id']} directory_url = reverse('browse-snapshot-directory', url_args=url_args, query_params=query_params) if not link_text: link_text = directory_url return gen_link(directory_url, link_text, link_attrs) def gen_content_link(sha1_git, link_text=None, link_attrs={}): """ Utility function for generating a link to a content HTML view to insert in Django templates. Args: sha1_git (str): content identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not sha1_git: return None content_url = reverse('browse-content', url_args={'query_string': 'sha1_git:' + sha1_git}) if not link_text: link_text = content_url return gen_link(content_url, link_text, link_attrs) def get_revision_log_url(revision_id, snapshot_context=None): """ Utility function for getting the URL for a revision log HTML view (possibly in the context of an origin). Args: revision_id (str): revision identifier the history heads to snapshot_context (dict): if provided, generate snapshot-dependent browsing link Returns: The revision log view URL """ query_params = {'revision': revision_id} if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] url_args = {'origin_url': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: url_args['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] revision_log_url = reverse('browse-origin-log', url_args=url_args, query_params=query_params) elif snapshot_context: url_args = {'snapshot_id': snapshot_context['snapshot_id']} revision_log_url = reverse('browse-snapshot-log', url_args=url_args, query_params=query_params) else: revision_log_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id}) return revision_log_url def gen_revision_log_link(revision_id, snapshot_context=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a revision log HTML view (possibly in the context of an origin) to insert in Django templates. Args: revision_id (str): revision identifier the history heads to snapshot_context (dict): if provided, generate snapshot-dependent browsing link link_text (str): optional text to use for the generated link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not revision_id: return None revision_log_url = get_revision_log_url(revision_id, snapshot_context) if not link_text: link_text = revision_log_url return gen_link(revision_log_url, link_text, link_attrs) def format_log_entries(revision_log, per_page, snapshot_context=None): """ Utility functions that process raw revision log data for HTML display. Its purpose is to: * add links to relevant browse views * format date in human readable format * truncate the message log Args: revision_log (list): raw revision log as returned by the swh-web api per_page (int): number of log entries per page snapshot_context (dict): if provided, generate snapshot-dependent browsing link """ revision_log_data = [] for i, rev in enumerate(revision_log): if i == per_page: break author_name = 'None' author_fullname = 'None' committer_fullname = 'None' if rev['author']: author_name = rev['author']['name'] or rev['author']['fullname'] author_fullname = rev['author']['fullname'] if rev['committer']: committer_fullname = rev['committer']['fullname'] author_date = format_utc_iso_date(rev['date']) committer_date = format_utc_iso_date(rev['committer_date']) tooltip = 'revision %s\n' % rev['id'] tooltip += 'author: %s\n' % author_fullname tooltip += 'author date: %s\n' % author_date tooltip += 'committer: %s\n' % committer_fullname tooltip += 'committer date: %s\n\n' % committer_date if rev['message']: tooltip += textwrap.indent(rev['message'], ' '*4) revision_log_data.append({ 'author': author_name, 'id': rev['id'][:7], 'message': rev['message'], 'date': author_date, 'commit_date': committer_date, 'url': gen_revision_url(rev['id'], snapshot_context), 'tooltip': tooltip }) return revision_log_data # list of origin types that can be found in the swh archive # TODO: retrieve it dynamically in an efficient way instead # of hardcoding it _swh_origin_types = ['git', 'svn', 'deb', 'hg', 'ftp', 'deposit', 'pypi'] def get_origin_info(origin_url, origin_type=None): """ Get info about a software origin. Its main purpose is to automatically find an origin type when it is not provided as parameter. Args: origin_url (str): complete url of a software origin origin_type (str): optional origin type Returns: A dict with the following entries: * type: the origin type * url: the origin url * id: the internal id of the origin """ if origin_type: return service.lookup_origin({'type': origin_type, 'url': origin_url}) else: for origin_type in _swh_origin_types: try: origin_info = service.lookup_origin({'type': origin_type, 'url': origin_url}) return origin_info except Exception: pass raise NotFoundExc('Origin with url %s not found!' % origin_url) def get_snapshot_context(snapshot_id=None, origin_type=None, origin_url=None, timestamp=None, visit_id=None): """ Utility function to compute relevant information when navigating the archive in a snapshot context. The snapshot is either referenced by its id or it will be retrieved from an origin visit. Args: snapshot_id (str): hexadecimal representation of a snapshot identifier, all other parameters will be ignored if it is provided origin_type (str): the origin type (git, svn, deposit, ...) origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/) timestamp (str): a datetime string for retrieving the closest visit of the origin visit_id (int): optional visit id for disambiguation in case of several visits with the same timestamp Returns: A dict with the following entries: * origin_info: dict containing origin information * visit_info: dict containing visit information * branches: the list of branches for the origin found during the visit * releases: the list of releases for the origin found during the visit * origin_browse_url: the url to browse the origin * origin_branches_url: the url to browse the origin branches * origin_releases_url': the url to browse the origin releases * origin_visit_url: the url to browse the snapshot of the origin found during the visit * url_args: dict containing url arguments to use when browsing in the context of the origin and its visit Raises: NotFoundExc: if no snapshot is found for the visit of an origin. """ # noqa origin_info = None visit_info = None url_args = None query_params = {} branches = [] releases = [] browse_url = None visit_url = None branches_url = None releases_url = None swh_type = 'snapshot' if origin_url: swh_type = 'origin' origin_info = get_origin_info(origin_url, origin_type) visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) fmt_date = format_utc_iso_date(visit_info['date']) visit_info['fmt_date'] = fmt_date snapshot_id = visit_info['snapshot'] if not snapshot_id: raise NotFoundExc('No snapshot associated to the visit of origin ' '%s on %s' % (origin_url, fmt_date)) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # use it in the urls generated below if timestamp: timestamp = visit_info['date'] branches, releases = \ get_origin_visit_snapshot(origin_info, timestamp, visit_id, snapshot_id) url_args = {'origin_type': origin_type, 'origin_url': origin_info['url']} query_params = {'visit_id': visit_id} browse_url = reverse('browse-origin-visits', url_args=url_args) if timestamp: url_args['timestamp'] = format_utc_iso_date(timestamp, '%Y-%m-%dT%H:%M:%S') visit_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) visit_info['url'] = visit_url branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) elif snapshot_id: branches, releases = get_snapshot_content(snapshot_id) url_args = {'snapshot_id': snapshot_id} browse_url = reverse('browse-snapshot', url_args=url_args) branches_url = reverse('browse-snapshot-branches', url_args=url_args) releases_url = reverse('browse-snapshot-releases', url_args=url_args) releases = list(reversed(releases)) snapshot_size = service.lookup_snapshot_size(snapshot_id) is_empty = sum(snapshot_size.values()) == 0 swh_snp_id = persistent_identifier('snapshot', snapshot_id) return { 'swh_type': swh_type, 'swh_object_id': swh_snp_id, 'snapshot_id': snapshot_id, 'snapshot_size': snapshot_size, 'is_empty': is_empty, 'origin_info': origin_info, # keep track if the origin type was provided as url argument 'origin_type': origin_type, 'visit_info': visit_info, 'branches': branches, 'releases': releases, 'branch': None, 'release': None, 'browse_url': browse_url, 'branches_url': branches_url, 'releases_url': releases_url, 'url_args': url_args, 'query_params': query_params } # list of common readme names ordered by preference # (lower indices have higher priority) _common_readme_names = [ "readme.markdown", "readme.md", "readme.rst", "readme.txt", "readme" ] def get_readme_to_display(readmes): """ Process a list of readme files found in a directory in order to find the adequate one to display. Args: readmes: a list of dict where keys are readme file names and values are readme sha1s Returns: A tuple (readme_name, readme_sha1) """ readme_name = None readme_url = None readme_sha1 = None readme_html = None lc_readmes = {k.lower(): {'orig_name': k, 'sha1': v} for k, v in readmes.items()} # look for readme names according to the preference order # defined by the _common_readme_names list for common_readme_name in _common_readme_names: if common_readme_name in lc_readmes: readme_name = lc_readmes[common_readme_name]['orig_name'] readme_sha1 = lc_readmes[common_readme_name]['sha1'] readme_url = reverse('browse-content-raw', url_args={'query_string': readme_sha1}) break # otherwise pick the first readme like file if any if not readme_name and len(readmes.items()) > 0: readme_name = next(iter(readmes)) readme_sha1 = readmes[readme_name] readme_url = reverse('browse-content-raw', url_args={'query_string': readme_sha1}) # convert rst README to html server side as there is # no viable solution to perform that task client side if readme_name and readme_name.endswith('.rst'): cache_entry_id = 'readme_%s' % readme_sha1 cache_entry = cache.get(cache_entry_id) if cache_entry: readme_html = cache_entry else: try: rst_doc = request_content(readme_sha1) readme_html = pypandoc.convert_text(rst_doc['raw_data'], 'html', format='rst') cache.set(cache_entry_id, readme_html) except Exception: readme_html = 'Readme bytes are not available' return readme_name, readme_url, readme_html def get_swh_persistent_ids(swh_objects, snapshot_context=None): """ Returns a list of dict containing info related to persistent identifiers of swh objects. Args: swh_objects (list): a list of dict with the following keys: * type: swh object type (content/directory/release/revision/snapshot) * id: swh object id snapshot_context (dict): optional parameter describing the snapshot in which the object has been found Returns: list: a list of dict with the following keys: * object_type: the swh object type (content/directory/release/revision/snapshot) * object_icon: the swh object icon to use in HTML views * swh_id: the computed swh object persistent identifier * swh_id_url: the url resolving the persistent identifier * show_options: boolean indicating if the persistent id options must be displayed in persistent ids HTML view """ # noqa swh_ids = [] for swh_object in swh_objects: if not swh_object['id']: continue swh_id = get_swh_persistent_id(swh_object['type'], swh_object['id']) show_options = swh_object['type'] == 'content' or \ (snapshot_context and snapshot_context['origin_info'] is not None) object_icon = swh_object_icons[swh_object['type']] swh_ids.append({ 'object_type': swh_object['type'], 'object_icon': object_icon, 'swh_id': swh_id, 'swh_id_url': reverse('browse-swh-id', url_args={'swh_id': swh_id}), 'show_options': show_options }) return swh_ids diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index 35fbe709..f82cd5b6 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,242 +1,242 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from distutils.util import strtobool from django.http import HttpResponse from django.shortcuts import render, redirect from swh.web.common import service +from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import ( - reverse, format_utc_iso_date, parse_timestamp, - get_origin_visits + reverse, format_utc_iso_date, parse_timestamp ) from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import ( get_origin_info, get_snapshot_context ) from swh.web.browse.browseurls import browse_route from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases ) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/directory/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/directory/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P.+)/visit/(?P.+)/directory/', # noqa r'origin/(?P.+)/visit/(?P.+)/directory/(?P.+)/', # noqa r'origin/(?P.+)/directory/', # noqa r'origin/(?P.+)/directory/(?P.+)/', # noqa view_name='browse-origin-directory') def origin_directory_browse(request, origin_url, origin_type=None, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/directory/[(path)/]` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/directory/[(path)/]` """ # noqa return browse_snapshot_directory( request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P.+)/visit/(?P.+)/content/(?P.+)/', # noqa r'origin/(?P.+)/content/(?P.+)/', # noqa view_name='browse-origin-content') def origin_content_browse(request, origin_url, origin_type=None, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/content/(path)/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/content/(path)/` """ # noqa return browse_snapshot_content(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp, path=path) PER_PAGE = 20 @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/log/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/log/', r'origin/(?P.+)/visit/(?P.+)/log/', # noqa r'origin/(?P.+)/log/', view_name='browse-origin-log') def origin_log_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/log/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/log/` """ # noqa return browse_snapshot_log(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/branches/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/branches/', # noqa r'origin/(?P.+)/visit/(?P.+)/branches/', # noqa r'origin/(?P.+)/branches/', # noqa view_name='browse-origin-branches') def origin_branches_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/branches/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/branches/` """ # noqa return browse_snapshot_branches(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visit/(?P.+)/releases/', # noqa r'origin/(?P[a-z]+)/url/(?P.+)/releases/', # noqa r'origin/(?P.+)/visit/(?P.+)/releases/', # noqa r'origin/(?P.+)/releases/', # noqa view_name='browse-origin-releases') def origin_releases_browse(request, origin_url, origin_type=None, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/releases/` * :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visit/(timestamp)/releases/` """ # noqa return browse_snapshot_releases(request, origin_type=origin_type, origin_url=origin_url, timestamp=timestamp) @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/visits/', r'origin/(?P.+)/visits/', view_name='browse-origin-visits') def origin_visits_browse(request, origin_url, origin_type=None): """Django view that produces an HTML display of visits reporting for a swh origin identified by its id or its url. The url that points to it is :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visits/`. """ # noqa try: origin_info = get_origin_info(origin_url, origin_type) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_type=origin_type, origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') visit['fmt_date'] = format_utc_iso_date(visit['date']) query_params = {} if i < len(origin_visits) - 1: if visit['date'] == origin_visits[i+1]['date']: query_params = {'visit_id': visit['visit']} if i > 0: if visit['date'] == origin_visits[i-1]['date']: query_params = {'visit_id': visit['visit']} snapshot = visit['snapshot'] if visit['snapshot'] else '' visit['browse_url'] = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url, 'timestamp': url_date}, query_params=query_params) if not snapshot: visit['snapshot'] = '' visit['date'] = parse_timestamp(visit['date']).timestamp() heading = 'Origin visits - %s' % origin_url return render(request, 'browse/origin-visits.html', {'heading': heading, 'swh_object_name': 'Visits', 'swh_object_metadata': origin_info, 'origin_visits': origin_visits, 'origin_info': origin_info, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': False}) @browse_route(r'origin/search/(?P.+)/', view_name='browse-origin-search') def _origin_search(request, url_pattern): """Internal browse endpoint to search for origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. """ offset = int(request.GET.get('offset', '0')) limit = int(request.GET.get('limit', '50')) regexp = request.GET.get('regexp', 'false') with_visit = request.GET.get('with_visit', 'false') url_pattern = url_pattern.replace('///', '\\') try: results = service.search_origin(url_pattern, offset, limit, bool(strtobool(regexp)), bool(strtobool(with_visit))) results = json.dumps(list(results), sort_keys=True, indent=4, separators=(',', ': ')) except Exception as exc: return handle_view_exception(request, exc, html_response=False) return HttpResponse(results, content_type='application/json') @browse_route(r'origin/(?P[0-9]+)/latest_snapshot/', view_name='browse-origin-latest-snapshot') def _origin_latest_snapshot(request, origin_id): """ Internal browse endpoint used to check if an origin has already been visited by Software Heritage and has at least one full visit. """ result = \ service.lookup_latest_origin_snapshot(origin_id, allowed_statuses=['full', 'partial']) result = json.dumps(result, sort_keys=True, indent=4, separators=(',', ': ')) return HttpResponse(result, content_type='application/json') @browse_route(r'origin/(?P[a-z]+)/url/(?P.+)/', r'origin/(?P.+)/', view_name='browse-origin') def origin_browse(request, origin_url, origin_type=None): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ # noqa last_snapshot_url = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url}) return redirect(last_snapshot_url) diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index 2682f40a..cf304680 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,355 +1,356 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.validators import URLValidator from swh.web import config from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc from swh.web.common.models import ( SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, SAVE_TASK_FAILED ) -from swh.web.common.utils import get_origin_visits, parse_timestamp +from swh.web.common.origin_visits import get_origin_visits +from swh.web.common.utils import parse_timestamp from swh.scheduler.utils import create_oneshot_task_dict scheduler = config.scheduler() def get_origin_save_authorized_urls(): """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls(): """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url): """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified return SAVE_REQUEST_PENDING # map origin type to scheduler task # TODO: do not hardcode the task name here # TODO: unlock hg and svn loading once the scheduler # loading tasks are available in production _origin_type_task = { 'git': 'origin-update-git', # 'hg': 'origin-load-hg', # 'svn': 'origin-load-svn' } # map scheduler task status to origin save status _save_task_status = { 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, 'next_run_scheduled': SAVE_TASK_SCHEDULED, 'completed': SAVE_TASK_SUCCEED, 'disabled': SAVE_TASK_FAILED } def get_savable_origin_types(): return sorted(list(_origin_type_task.keys())) def _check_origin_type_savable(origin_type): """ Get the list of software origin types that can be loaded through a save request. Returns: list: the list of saveable origin types """ allowed_origin_types = ', '.join(get_savable_origin_types()) if origin_type not in _origin_type_task: raise BadInputExc('Origin of type %s can not be saved! ' 'Allowed types are the following: %s' % (origin_type, allowed_origin_types)) _validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) def _check_origin_url_valid(origin_url): try: _validate_url(origin_url) except ValidationError: raise BadInputExc('The provided origin url (%s) is not valid!' % origin_url) def _get_visit_date_for_save_request(save_request): visit_date = None try: origin = {'type': save_request.origin_type, 'url': save_request.origin_url} origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_timestamp(v['date']) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] except Exception: pass return visit_date def _save_request_dict(save_request, task=None): must_save = False visit_date = save_request.visit_date if task: save_task_status = _save_task_status[task['status']] if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) \ and not visit_date: visit_date = _get_visit_date_for_save_request(save_request) save_request.visit_date = visit_date must_save = True # Ensure last origin visit is available in database # before reporting the task execution as successful if save_task_status == SAVE_TASK_SUCCEED and not visit_date: save_task_status = SAVE_TASK_SCHEDULED if save_request.loading_task_status != save_task_status: save_request.loading_task_status = save_task_status must_save = True if must_save: save_request.save() else: save_task_status = save_request.loading_task_status return {'origin_type': save_request.origin_type, 'origin_url': save_request.origin_url, 'save_request_date': save_request.request_date.isoformat(), 'save_request_status': save_request.status, 'save_task_status': save_task_status, 'visit_date': visit_date.isoformat() if visit_date else None} def create_save_origin_request(origin_type, origin_url): """ Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the origin type and url are valid but also if the the save request can be accepted. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: origin_type (str): the type of origin to save (currently only ``git`` but ``svn`` and ``hg`` will soon be available) origin_url (str): the url of the origin to save Raises: BadInputExc: the origin type or url is invalid ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **origin_type**: the type of the origin to save * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) save_request_status = can_save_origin(origin_url) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = {'priority': 'high'} # set task parameters according to the origin type if origin_type == 'git': kwargs['repo_url'] = origin_url elif origin_type == 'hg': kwargs['origin_url'] = origin_url elif origin_type == 'svn': kwargs['origin_url'] = origin_url kwargs['svn_url'] = origin_url sor = None # get list of previously sumitted save requests current_sors = \ list(SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url)) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler.get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_status = _save_request_dict(sor, task)['save_task_status'] # create a new scheduler task only if the previous one has been # already executed if task_status == SAVE_TASK_FAILED or \ task_status == SAVE_TASK_SUCCEED: can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict( _origin_type_task[origin_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task['id'] sor.save() else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status, # noqa loading_task_id=task['id']) # noqa # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create(origin_type=origin_type, origin_url=origin_url, status=save_request_status) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc('The origin url is blacklisted and will not be ' 'loaded into the archive.') return _save_request_dict(sor, task) def get_save_origin_requests_from_queryset(requests_queryset): """ Get all save requests from a SaveOriginRequest queryset. Args: requests_queryset (django.db.models.QuerySet): input SaveOriginRequest queryset Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) tasks = scheduler.get_tasks(task_ids) tasks = {task['id']: task for task in tasks} requests = [] for sor in requests_queryset: sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id, None)) requests.append(sr_dict) return requests def get_save_origin_requests(origin_type, origin_url): """ Get all save requests for a given software origin. Args: origin_type (str): the type of the origin origin_url (str): the url of the origin Raises: BadInputExc: the origin type or url is invalid Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_origin_type_savable(origin_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter(origin_type=origin_type, origin_url=origin_url) return get_save_origin_requests_from_queryset(sors) diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py new file mode 100644 index 00000000..00d605d3 --- /dev/null +++ b/swh/web/common/origin_visits.py @@ -0,0 +1,186 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import math + +from django.core.cache import cache + +from swh.web.common.exc import NotFoundExc +from swh.web.common.utils import parse_timestamp + + +def get_origin_visits(origin_info): + """Function that returns the list of visits for a swh origin. + That list is put in cache in order to speedup the navigation + in the swh web browse ui. + + Args: + origin_id (int): the id of the swh origin to fetch visits from + + Returns: + list: A list of dict describing the origin visits with the + following keys: + + * **date**: UTC visit date in ISO format, + * **origin**: the origin id + * **status**: the visit status, either **full**, **partial** + or **ongoing** + * **visit**: the visit id + + Raises: + NotFoundExc: if the origin is not found + """ + + from swh.web.common import service + + cache_entry_id = 'origin_%s_visits' % origin_info['id'] + cache_entry = cache.get(cache_entry_id) + + last_snapshot = service.lookup_latest_origin_snapshot(origin_info['id']) + + if cache_entry and \ + (not last_snapshot or + last_snapshot['id'] == cache_entry[-1]['snapshot']): + return cache_entry + + origin_visits = [] + + per_page = service.MAX_LIMIT + last_visit = None + while 1: + visits = list(service.lookup_origin_visits(origin_info['id'], + last_visit=last_visit, + per_page=per_page)) + origin_visits += visits + if len(visits) < per_page: + break + else: + if not last_visit: + last_visit = per_page + else: + last_visit += per_page + + def _visit_sort_key(visit): + ts = parse_timestamp(visit['date']).timestamp() + return ts + (float(visit['visit']) / 10e3) + + for v in origin_visits: + if 'metadata' in v: + del v['metadata'] + origin_visits = [dict(t) for t in set([tuple(d.items()) + for d in origin_visits])] + origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) + + cache.set(cache_entry_id, origin_visits) + + return origin_visits + + +def get_origin_visit(origin_info, visit_ts=None, visit_id=None, + snapshot_id=None): + """Function that returns information about a visit for + a given origin. + The visit is retrieved from a provided timestamp. + The closest visit from that timestamp is selected. + + Args: + origin_info (dict): a dict filled with origin information + (id, url, type) + visit_ts (int or str): an ISO date string or Unix timestamp to parse + + Returns: + A dict containing the visit info as described below:: + + {'origin': 2, + 'date': '2017-10-08T11:54:25.582463+00:00', + 'metadata': {}, + 'visit': 25, + 'status': 'full'} + + """ + visits = get_origin_visits(origin_info) + + if not visits: + if 'type' in origin_info and 'url' in origin_info: + message = ('No visit associated to origin with' + ' type %s and url %s!' % (origin_info['type'], + origin_info['url'])) + else: + message = ('No visit associated to origin with' + ' id %s!' % origin_info['id']) + raise NotFoundExc(message) + + if snapshot_id: + visit = [v for v in visits if v['snapshot'] == snapshot_id] + if len(visit) == 0: + if 'type' in origin_info and 'url' in origin_info: + message = ('Visit for snapshot with id %s for origin with type' + ' %s and url %s not found!' % + (snapshot_id, origin_info['type'], + origin_info['url'])) + else: + message = ('Visit for snapshot with id %s for origin with' + ' id %s not found!' % + (snapshot_id, origin_info['id'])) + raise NotFoundExc(message) + return visit[0] + + if visit_id: + visit = [v for v in visits if v['visit'] == int(visit_id)] + if len(visit) == 0: + if 'type' in origin_info and 'url' in origin_info: + message = ('Visit with id %s for origin with type %s' + ' and url %s not found!' % + (visit_id, origin_info['type'], origin_info['url'])) + else: + message = ('Visit with id %s for origin with id %s' + ' not found!' % (visit_id, origin_info['id'])) + raise NotFoundExc(message) + return visit[0] + + if not visit_ts: + # returns the latest full visit when no timestamp is provided + for v in reversed(visits): + if v['status'] == 'full': + return v + return visits[-1] + + parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) + + visit_idx = None + for i, visit in enumerate(visits): + ts = math.floor(parse_timestamp(visit['date']).timestamp()) + if i == 0 and parsed_visit_ts <= ts: + return visit + elif i == len(visits) - 1: + if parsed_visit_ts >= ts: + return visit + else: + next_ts = math.floor( + parse_timestamp(visits[i+1]['date']).timestamp()) + if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: + if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): + visit_idx = i + break + else: + visit_idx = i+1 + break + + if visit_idx is not None: + visit = visits[visit_idx] + while visit_idx < len(visits) - 1 and \ + visit['date'] == visits[visit_idx+1]['date']: + visit_idx = visit_idx + 1 + visit = visits[visit_idx] + return visit + else: + if 'type' in origin_info and 'url' in origin_info: + message = ('Visit with timestamp %s for origin with type %s ' + 'and url %s not found!' % + (visit_ts, origin_info['type'], origin_info['url'])) + else: + message = ('Visit with timestamp %s for origin with id %s ' + 'not found!' % (visit_ts, origin_info['id'])) + raise NotFoundExc(message) diff --git a/swh/web/common/service.py b/swh/web/common/service.py index d77ba4dc..c80bc207 100644 --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -1,1020 +1,1036 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from collections import defaultdict from swh.model import hashutil from swh.storage.algos import revisions_walker from swh.web.common import converters from swh.web.common import query from swh.web.common.exc import NotFoundExc +from swh.web.common.origin_visits import get_origin_visit from swh.web import config storage = config.storage() vault = config.vault() idx_storage = config.indexer_storage() MAX_LIMIT = 50 # Top limit the users can ask for def _first_element(l): """Returns the first element in the provided list or None if it is empty or None""" return next(iter(l or []), None) def lookup_multiple_hashes(hashes): """Lookup the passed hashes in a single DB connection, using batch processing. Args: An array of {filename: X, sha1: Y}, string X, hex sha1 string Y. Returns: The same array with elements updated with elem['found'] = true if the hash is present in storage, elem['found'] = false if not. """ hashlist = [hashutil.hash_to_bytes(elem['sha1']) for elem in hashes] content_missing = storage.content_missing_per_sha1(hashlist) missing = [hashutil.hash_to_hex(x) for x in content_missing] for x in hashes: x.update({'found': True}) for h in hashes: if h['sha1'] in missing: h['found'] = False return hashes def lookup_expression(expression, last_sha1, per_page): """Lookup expression in raw content. Args: expression (str): An expression to lookup through raw indexed content last_sha1 (str): Last sha1 seen per_page (int): Number of results per page Returns: List of ctags whose content match the expression """ limit = min(per_page, MAX_LIMIT) ctags = idx_storage.content_ctags_search(expression, last_sha1=last_sha1, limit=limit) for ctag in ctags: ctag = converters.from_swh(ctag, hashess={'id'}) ctag['sha1'] = ctag['id'] ctag.pop('id') yield ctag def lookup_hash(q): """Checks if the storage contains a given content checksum Args: query string of the form Returns: Dict with key found containing the hash info if the hash is present, None if not. """ algo, hash = query.parse_hash(q) found = storage.content_find({algo: hash}) return {'found': converters.from_content(found), 'algo': algo} def search_hash(q): """Checks if the storage contains a given content checksum Args: query string of the form Returns: Dict with key found to True or False, according to whether the checksum is present or not """ algo, hash = query.parse_hash(q) found = storage.content_find({algo: hash}) return {'found': found is not None} def _lookup_content_sha1(q): """Given a possible input, query for the content's sha1. Args: q: query string of the form Returns: binary sha1 if found or None """ algo, hash = query.parse_hash(q) if algo != 'sha1': hashes = storage.content_find({algo: hash}) if not hashes: return None return hashes['sha1'] return hash def lookup_content_ctags(q): """Return ctags information from a specified content. Args: q: query string of the form Yields: ctags information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None ctags = list(idx_storage.content_ctags_get([sha1])) if not ctags: return None for ctag in ctags: yield converters.from_swh(ctag, hashess={'id'}) def lookup_content_filetype(q): """Return filetype information from a specified content. Args: q: query string of the form Yields: filetype information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None filetype = _first_element(list(idx_storage.content_mimetype_get([sha1]))) if not filetype: return None return converters.from_filetype(filetype) def lookup_content_language(q): """Return language information from a specified content. Args: q: query string of the form Yields: language information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lang = _first_element(list(idx_storage.content_language_get([sha1]))) if not lang: return None return converters.from_swh(lang, hashess={'id'}) def lookup_content_license(q): """Return license information from a specified content. Args: q: query string of the form Yields: license information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lic = _first_element(idx_storage.content_fossology_license_get([sha1])) if not lic: return None return converters.from_swh({'id': sha1, 'facts': lic[sha1]}, hashess={'id'}) def lookup_origin(origin): """Return information about the origin matching dict origin. Args: origin: origin's dict with keys either 'id' or ('type' AND 'url') Returns: origin information as dict. """ origin_info = storage.origin_get(origin) if not origin_info: if 'id' in origin and origin['id']: msg = 'Origin with id %s not found!' % origin['id'] else: msg = 'Origin with type %s and url %s not found!' % \ (origin['type'], origin['url']) raise NotFoundExc(msg) return converters.from_origin(origin_info) def search_origin(url_pattern, offset=0, limit=50, regexp=False, with_visit=False): """Search for origins whose urls contain a provided string pattern or match a provided regular expression. Args: url_pattern: the string pattern to search for in origin urls offset: number of found origins to skip before returning results limit: the maximum number of found origins to return Returns: list of origin information as dict. """ origins = storage.origin_search(url_pattern, offset, limit, regexp, with_visit) return map(converters.from_origin, origins) def search_origin_metadata(fulltext, limit=50): """Search for origins whose metadata match a provided string pattern. Args: fulltext: the string pattern to search for in origin metadata offset: number of found origins to skip before returning results limit: the maximum number of found origins to return Returns: list of origin metadata as dict. """ results = idx_storage.origin_intrinsic_metadata_search_fulltext( conjunction=[fulltext], limit=limit) for result in results: result['from_revision'] = hashutil.hash_to_hex(result['from_revision']) return results def lookup_person(person_id): """Return information about the person with id person_id. Args: person_id as string Returns: person information as dict. Raises: NotFoundExc if there is no person with the provided id. """ person = _first_element(storage.person_get([person_id])) if not person: raise NotFoundExc('Person with id %s not found' % person_id) return converters.from_person(person) def _to_sha1_bin(sha1_hex): _, sha1_git_bin = query.parse_hash_with_algorithms_or_throws( sha1_hex, ['sha1'], # HACK: sha1_git really 'Only sha1_git is supported.') return sha1_git_bin def _check_directory_exists(sha1_git, sha1_git_bin): if len(list(storage.directory_missing([sha1_git_bin]))): raise NotFoundExc('Directory with sha1_git %s not found' % sha1_git) def lookup_directory(sha1_git): """Return information about the directory with id sha1_git. Args: sha1_git as string Returns: directory information as dict. """ empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' if sha1_git == empty_dir_sha1: return [] sha1_git_bin = _to_sha1_bin(sha1_git) _check_directory_exists(sha1_git, sha1_git_bin) directory_entries = storage.directory_ls(sha1_git_bin) return map(converters.from_directory_entry, directory_entries) def lookup_directory_with_path(sha1_git, path_string): """Return directory information for entry with path path_string w.r.t. root directory pointed by directory_sha1_git Args: - directory_sha1_git: sha1_git corresponding to the directory to which we append paths to (hopefully) find the entry - the relative path to the entry starting from the directory pointed by directory_sha1_git Raises: NotFoundExc if the directory entry is not found """ sha1_git_bin = _to_sha1_bin(sha1_git) _check_directory_exists(sha1_git, sha1_git_bin) paths = path_string.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not queried_dir: raise NotFoundExc(('Directory entry with path %s from %s not found') % (path_string, sha1_git)) return converters.from_directory_entry(queried_dir) def lookup_release(release_sha1_git): """Return information about the release with sha1 release_sha1_git. Args: release_sha1_git: The release's sha1 as hexadecimal Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_git_bin = _to_sha1_bin(release_sha1_git) release = _first_element(storage.release_get([sha1_git_bin])) if not release: raise NotFoundExc('Release with sha1_git %s not found.' % release_sha1_git) return converters.from_release(release) def lookup_release_multiple(sha1_git_list): """Return information about the revisions identified with their sha1_git identifiers. Args: sha1_git_list: A list of revision sha1_git identifiers Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) releases = storage.release_get(sha1_bin_list) or [] return (converters.from_release(r) for r in releases) def lookup_revision(rev_sha1_git): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) return converters.from_revision(revision) def lookup_revision_multiple(sha1_git_list): """Return information about the revisions identified with their sha1_git identifiers. Args: sha1_git_list: A list of revision sha1_git identifiers Returns: Generator of revisions information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) revisions = storage.revision_get(sha1_bin_list) or [] return (converters.from_revision(r) for r in revisions) def lookup_revision_message(rev_sha1_git): """Return the raw message of the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Decoded revision message as dict {'message': } Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if the revision is not found, or if it has no message """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) if 'message' not in revision: raise NotFoundExc('No message for revision with sha1_git %s.' % rev_sha1_git) res = {'message': revision['message']} return res +def _lookup_revision_id_by(origin_id, branch_name, timestamp): + def _get_snapshot_branch(snapshot, branch_name): + snapshot = lookup_snapshot(visit['snapshot'], + branches_from=branch_name, + branches_count=10) + branch = None + if branch_name in snapshot['branches']: + branch = snapshot['branches'][branch_name] + return branch + + visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp) + branch = _get_snapshot_branch(visit['snapshot'], branch_name) + rev_id = None + if branch and branch['target_type'] == 'revision': + rev_id = branch['target'] + elif branch and branch['target_type'] == 'alias': + branch = _get_snapshot_branch(visit['snapshot'], branch['target']) + if branch and branch['target_type'] == 'revision': + rev_id = branch['target'] + + if not rev_id: + raise NotFoundExc('Revision for origin %s and branch %s not found.' + % (origin_id, branch_name)) + + return rev_id + + def lookup_revision_by(origin_id, - branch_name="refs/heads/master", + branch_name='HEAD', timestamp=None): - """Lookup revisions by origin_id, branch_name and timestamp. + """Lookup revision by origin id, snapshot branch name and visit timestamp. - If: - - branch_name is not provided, lookup using 'refs/heads/master' as default. - - ts is not provided, use the most recent + If branch_name is not provided, lookup using 'HEAD' as default. + If timestamp is not provided, use the most recent. Args: - - origin_id: origin of the revision. - - branch_name: revision's branch. - - timestamp: revision's time frame. + origin_id (int): origin of the revision + branch_name (str): snapshot branch name + timestamp (str/int): origin visit time frame - Yields: - The revisions matching the criterions. + Returns: + dict: The revision matching the criterions Raises: NotFoundExc if no revision corresponds to the criterion """ - res = _first_element(storage.revision_get_by(origin_id, - branch_name, - timestamp=timestamp, - limit=1)) - if not res: - raise NotFoundExc('Revision for origin %s and branch %s not found.' - % (origin_id, branch_name)) - return converters.from_revision(res) + rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp) + return lookup_revision(rev_id) def lookup_revision_log(rev_sha1_git, limit): - """Return information about the revision with sha1 revision_sha1_git. + """Lookup revision log by revision id. Args: - revision_sha1_git: The revision's sha1 as hexadecimal - limit: the maximum number of revisions returned + rev_sha1_git (str): The revision's sha1 as hexadecimal + limit (int): the maximum number of revisions returned Returns: - Revision information as dict. + list: Revision log as list of revision dicts Raises: - ValueError if the identifier provided is not of sha1 nature. - NotFoundExc if there is no revision with the provided sha1_git. + ValueError: if the identifier provided is not of sha1 nature. + NotFoundExc: if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision_entries = storage.revision_log([sha1_git_bin], limit) if not revision_entries: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) return map(converters.from_revision, revision_entries) def lookup_revision_log_by(origin_id, branch_name, timestamp, limit): - """Return information about the revision with sha1 revision_sha1_git. + """Lookup revision by origin id, snapshot branch name and visit timestamp. Args: - origin_id: origin of the revision - branch_name: revision's branch - timestamp: revision's time frame - limit: the maximum number of revisions returned + origin_id (int): origin of the revision + branch_name (str): snapshot branch + timestamp (str/int): origin visit time frame + limit (int): the maximum number of revisions returned Returns: - Revision information as dict. + list: Revision log as list of revision dicts Raises: - NotFoundExc if no revision corresponds to the criterion + NotFoundExc: if no revision corresponds to the criterion """ - revision_entries = storage.revision_log_by(origin_id, - branch_name, - timestamp, - limit=limit) - if not revision_entries: - return None - return map(converters.from_revision, revision_entries) + rev_id = _lookup_revision_id_by(origin_id, branch_name, timestamp) + return lookup_revision_log(rev_id, limit) def lookup_revision_with_context_by(origin_id, branch_name, ts, sha1_git, limit=100): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. sha1_git_root being resolved through the lookup of a revision by origin_id, branch_name and ts. In other words, sha1_git is an ancestor of sha1_git_root. Args: - origin_id: origin of the revision. - branch_name: revision's branch. - timestamp: revision's time frame. - sha1_git: one of sha1_git_root's ancestors. - limit: limit the lookup to 100 revisions back. Returns: Pair of (root_revision, revision). Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: - BadInputExc in case of unknown algo_hash or bad hash. - NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root. """ rev_root = _first_element(storage.revision_get_by(origin_id, branch_name, timestamp=ts, limit=1)) if not rev_root: raise NotFoundExc('Revision with (origin_id: %s, branch_name: %s' ', ts: %s) not found.' % (origin_id, branch_name, ts)) return (converters.from_revision(rev_root), lookup_revision_with_context(rev_root, sha1_git, limit)) def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision. The type is either a sha1 (as an hex string) or a non converted dict. sha1_git: one of sha1_git_root's ancestors limit: limit the lookup to 100 revisions back Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa if not revision_root: raise NotFoundExc('Revision root %s not found' % sha1_git_root) else: sha1_git_root_bin = sha1_git_root['id'] revision_log = storage.revision_log([sha1_git_root_bin], limit) parents = {} children = defaultdict(list) for rev in revision_log: rev_id = rev['id'] parents[rev_id] = [] for parent_id in rev['parents']: parents[rev_id].append(parent_id) children[parent_id].append(rev_id) if revision['id'] not in parents: raise NotFoundExc('Revision %s is not an ancestor of %s' % (sha1_git, sha1_git_root)) revision['children'] = children[revision['id']] return converters.from_revision(revision) def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False): """Return information on directory pointed by revision with sha1_git. If dir_path is not provided, display top level directory. Otherwise, display the directory pointed by dir_path (if it exists). Args: sha1_git: revision's hash. dir_path: optional directory pointed to by that revision. with_data: boolean that indicates to retrieve the raw data if the path resolves to a content. Default to False (for the api) Returns: Information on the directory pointed to by that revision. Raises: BadInputExc in case of unknown algo_hash or bad hash. NotFoundExc either if the revision is not found or the path referenced does not exist. NotImplementedError in case of dir_path exists but do not reference a type 'dir' or 'file'. """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) dir_sha1_git_bin = revision['directory'] if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not entity: raise NotFoundExc( "Directory or File '%s' pointed to by revision %s not found" % (dir_path, sha1_git)) else: entity = {'type': 'dir', 'target': dir_sha1_git_bin} if entity['type'] == 'dir': directory_entries = storage.directory_ls(entity['target']) or [] return {'type': 'dir', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': map(converters.from_directory_entry, directory_entries)} elif entity['type'] == 'file': # content content = storage.content_find({'sha1_git': entity['target']}) if with_data: c = _first_element(storage.content_get([content['sha1']])) content['data'] = c['data'] return {'type': 'file', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': converters.from_content(content)} else: raise NotImplementedError('Entity of type %s not implemented.' % entity['type']) def lookup_content(q): """Lookup the content designed by q. Args: q: The release's sha1 as hexadecimal Raises: NotFoundExc if the requested content is not found """ algo, hash = query.parse_hash(q) c = storage.content_find({algo: hash}) if not c: raise NotFoundExc('Content with %s checksum equals to %s not found!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(c) def lookup_content_raw(q): """Lookup the content defined by q. Args: q: query string of the form Returns: dict with 'sha1' and 'data' keys. data representing its raw data decoded. Raises: NotFoundExc if the requested content is not found or if the content bytes are not available in the storage """ c = lookup_content(q) content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1']) content = _first_element(storage.content_get([content_sha1_bytes])) if not content: algo, hash = query.parse_hash(q) raise NotFoundExc('Bytes of content with %s checksum equals to %s ' 'are not available!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(content) def stat_counters(): """Return the stat counters for Software Heritage Returns: A dict mapping textual labels to integer values. """ return storage.stat_counters() def _lookup_origin_visits(origin_id, last_visit=None, limit=10): """Yields the origin origin_ids' visits. Args: origin_id (int): origin to list visits for last_visit (int): last visit to lookup from limit (int): Number of elements max to display Yields: Dictionaries of origin_visit for that origin """ limit = min(limit, MAX_LIMIT) yield from storage.origin_visit_get( origin_id, last_visit=last_visit, limit=limit) def lookup_origin_visits(origin_id, last_visit=None, per_page=10): """Yields the origin origin_ids' visits. Args: origin_id: origin to list visits for Yields: Dictionaries of origin_visit for that origin """ visits = _lookup_origin_visits(origin_id, last_visit=last_visit, limit=per_page) for visit in visits: yield converters.from_origin_visit(visit) def lookup_origin_visit(origin_id, visit_id): """Return information about visit visit_id with origin origin_id. Args: origin_id: origin concerned by the visit visit_id: the visit identifier to lookup Yields: The dict origin_visit concerned """ visit = storage.origin_visit_get_by(origin_id, visit_id) if not visit: raise NotFoundExc('Origin with id %s or its visit ' 'with id %s not found!' % (origin_id, visit_id)) return converters.from_origin_visit(visit) def lookup_snapshot_size(snapshot_id): """Count the number of branches in the snapshot with the given id Args: snapshot_id (str): sha1 identifier of the snapshot Returns: dict: A dict whose keys are the target types of branches and values their corresponding amount """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot_size = storage.snapshot_count_branches(snapshot_id_bin) if 'revision' not in snapshot_size: snapshot_size['revision'] = 0 if 'release' not in snapshot_size: snapshot_size['release'] = 0 return snapshot_size -def lookup_snapshot(snapshot_id, branches_from='', branches_count=None, +def lookup_snapshot(snapshot_id, branches_from='', branches_count=1000, target_types=None): """Return information about a snapshot, aka the list of named branches found during a specific visit of an origin. Args: snapshot_id (str): sha1 identifier of the snapshot branches_from (str): optional parameter used to skip branches whose name is lesser than it before returning them branches_count (int): optional parameter used to restrain the amount of returned branches target_types (list): optional parameter used to filter the target types of branch to return (possible values that can be contained in that list are `'content', 'directory', 'revision', 'release', 'snapshot', 'alias'`) Returns: A dict filled with the snapshot content. """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot = storage.snapshot_get_branches(snapshot_id_bin, branches_from.encode(), branches_count, target_types) if not snapshot: raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id) return converters.from_snapshot(snapshot) def lookup_latest_origin_snapshot(origin_id, allowed_statuses=None): """Return information about the latest snapshot of an origin. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_id: integer identifier of the origin allowed_statuses: list of visit statuses considered to find the latest snapshot for the visit. For instance, ``allowed_statuses=['full']`` will only consider visits that have successfully run to completion. Returns: A dict filled with the snapshot content. """ snapshot = storage.snapshot_get_latest(origin_id, allowed_statuses) return converters.from_snapshot(snapshot) def lookup_revision_through(revision, limit=100): """Retrieve a revision from the criterion stored in revision dictionary. Args: revision: Dictionary of criterion to lookup the revision with. Here are the supported combination of possible values: - origin_id, branch_name, ts, sha1_git - origin_id, branch_name, ts - sha1_git_root, sha1_git - sha1_git Returns: None if the revision is not found or the actual revision. """ if 'origin_id' in revision and \ 'branch_name' in revision and \ 'ts' in revision and \ 'sha1_git' in revision: return lookup_revision_with_context_by(revision['origin_id'], revision['branch_name'], revision['ts'], revision['sha1_git'], limit) if 'origin_id' in revision and \ 'branch_name' in revision and \ 'ts' in revision: return lookup_revision_by(revision['origin_id'], revision['branch_name'], revision['ts']) if 'sha1_git_root' in revision and \ 'sha1_git' in revision: return lookup_revision_with_context(revision['sha1_git_root'], revision['sha1_git'], limit) if 'sha1_git' in revision: return lookup_revision(revision['sha1_git']) # this should not happen raise NotImplementedError('Should not happen!') def lookup_directory_through_revision(revision, path=None, limit=100, with_data=False): """Retrieve the directory information from the revision. Args: revision: dictionary of criterion representing a revision to lookup path: directory's path to lookup. limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of. with_data: indicate to retrieve the content's raw data if path resolves to a content. Returns: The directory pointing to by the revision criterions at path. """ rev = lookup_revision_through(revision, limit) if not rev: raise NotFoundExc('Revision with criterion %s not found!' % revision) return (rev['id'], lookup_directory_with_revision(rev['id'], path, with_data)) def vault_cook(obj_type, obj_id, email=None): """Cook a vault bundle. """ return vault.cook(obj_type, obj_id, email=email) def vault_fetch(obj_type, obj_id): """Fetch a vault bundle. """ return vault.fetch(obj_type, obj_id) def vault_progress(obj_type, obj_id): """Get the current progress of a vault bundle. """ return vault.progress(obj_type, obj_id) def diff_revision(rev_id): """Get the list of file changes (insertion / deletion / modification / renaming) for a particular revision. """ rev_sha1_git_bin = _to_sha1_bin(rev_id) changes = storage.diff_revision(rev_sha1_git_bin, track_renaming=True) for change in changes: change['from'] = converters.from_directory_entry(change['from']) change['to'] = converters.from_directory_entry(change['to']) if change['from_path']: change['from_path'] = change['from_path'].decode('utf-8') if change['to_path']: change['to_path'] = change['to_path'].decode('utf-8') return changes class _RevisionsWalkerProxy(object): """ Proxy class wrapping a revisions walker iterator from swh-storage and performing needed conversions. """ def __init__(self, rev_walker_type, rev_start, *args, **kwargs): rev_start_bin = hashutil.hash_to_bytes(rev_start) self.revisions_walker = \ revisions_walker.get_revisions_walker(rev_walker_type, storage, rev_start_bin, *args, **kwargs) def export_state(self): return self.revisions_walker.export_state() def __next__(self): return converters.from_revision(next(self.revisions_walker)) def __iter__(self): return self def get_revisions_walker(rev_walker_type, rev_start, *args, **kwargs): """ Utility function to instantiate a revisions walker of a given type, see :mod:`swh.storage.algos.revisions_walker`. Args: rev_walker_type (str): the type of revisions walker to return, possible values are: ``committer_date``, ``dfs``, ``dfs_post``, ``bfs`` and ``path`` rev_start (str): hexadecimal representation of a revision identifier args (list): position arguments to pass to the revisions walker constructor kwargs (dict): keyword arguments to pass to the revisions walker constructor """ # first check if the provided revision is valid lookup_revision(rev_start) return _RevisionsWalkerProxy(rev_walker_type, rev_start, *args, **kwargs) diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index a9336bfc..1522fcf3 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,413 +1,348 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import docutils.parsers.rst import docutils.utils import re import requests from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz -from django.core.cache import cache from django.urls import reverse as django_reverse from django.http import QueryDict from swh.model.exceptions import ValidationError from swh.model.identifiers import ( persistent_identifier, parse_persistent_identifier, CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT ) -from swh.web.common import service + from swh.web.common.exc import BadInputExc from swh.web.config import get_config swh_object_icons = { 'branch': 'fa fa-code-fork', 'branches': 'fa fa-code-fork', 'content': 'fa fa-file-text', 'directory': 'fa fa-folder', 'person': 'fa fa-user', 'revisions history': 'fa fa-history', 'release': 'fa fa-tag', 'releases': 'fa fa-tag', 'revision': 'octicon octicon-git-commit', 'snapshot': 'fa fa-camera', 'visits': 'fa fa-calendar', } def reverse(viewname, url_args=None, query_params=None, current_app=None, urlconf=None): """An override of django reverse function supporting query parameters. Args: viewname (str): the name of the django view from which to compute a url url_args (dict): dictionary of url arguments indexed by their names query_params (dict): dictionary of query parameters to append to the reversed url current_app (str): the name of the django app tighten to the view urlconf (str): url configuration module Returns: str: the url of the requested view with processed arguments and query parameters """ if url_args: url_args = {k: v for k, v in url_args.items() if v is not None} url = django_reverse(viewname, urlconf=urlconf, kwargs=url_args, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/;:')) return url def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datetime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: datetime.datetime: a timezone-aware datetime representing the parsed value or None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): """Turns a string representation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: str: a formatted string representation of the input iso date """ if not iso_date: return iso_date date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: list: a list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info -def get_origin_visits(origin_info): - """Function that returns the list of visits for a swh origin. - That list is put in cache in order to speedup the navigation - in the swh web browse ui. - - Args: - origin_id (int): the id of the swh origin to fetch visits from - - Returns: - list: A list of dict describing the origin visits with the - following keys: - - * **date**: UTC visit date in ISO format, - * **origin**: the origin id - * **status**: the visit status, either **full**, **partial** - or **ongoing** - * **visit**: the visit id - - Raises: - NotFoundExc: if the origin is not found - """ - cache_entry_id = 'origin_%s_visits' % origin_info['id'] - cache_entry = cache.get(cache_entry_id) - - last_snapshot = service.lookup_latest_origin_snapshot(origin_info['id']) - - if cache_entry and \ - (not last_snapshot or - last_snapshot['id'] == cache_entry[-1]['snapshot']): - return cache_entry - - origin_visits = [] - - per_page = service.MAX_LIMIT - last_visit = None - while 1: - visits = list(service.lookup_origin_visits(origin_info['id'], - last_visit=last_visit, - per_page=per_page)) - origin_visits += visits - if len(visits) < per_page: - break - else: - if not last_visit: - last_visit = per_page - else: - last_visit += per_page - - def _visit_sort_key(visit): - ts = parse_timestamp(visit['date']).timestamp() - return ts + (float(visit['visit']) / 10e3) - - for v in origin_visits: - if 'metadata' in v: - del v['metadata'] - origin_visits = [dict(t) for t in set([tuple(d.items()) - for d in origin_visits])] - origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) - - cache.set(cache_entry_id, origin_visits) - - return origin_visits - - def get_swh_persistent_id(object_type, object_id, scheme_version=1): """ Returns the persistent identifier for a swh object based on: * the object type * the object id * the swh identifiers scheme version Args: object_type (str): the swh object type (content/directory/release/revision/snapshot) object_id (str): the swh object id (hexadecimal representation of its hash value) scheme_version (int): the scheme version of the swh persistent identifiers Returns: str: the swh object persistent identifier Raises: BadInputExc: if the provided parameters do not enable to generate a valid identifier """ try: swh_id = persistent_identifier(object_type, object_id, scheme_version) except ValidationError as e: raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % (object_id, e)) else: return swh_id def resolve_swh_persistent_id(swh_id, query_params=None): """ Try to resolve a Software Heritage persistent id into an url for browsing the pointed object. Args: swh_id (str): a Software Heritage persistent identifier query_params (django.http.QueryDict): optional dict filled with query parameters to append to the browse url Returns: dict: a dict with the following keys: * **swh_id_parsed (swh.model.identifiers.PersistentId)**: the parsed identifier * **browse_url (str)**: the url for browsing the pointed object Raises: BadInputExc: if the provided identifier can not be parsed """ # noqa try: swh_id_parsed = parse_persistent_identifier(swh_id) object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id browse_url = None query_dict = QueryDict('', mutable=True) if query_params and len(query_params) > 0: for k in sorted(query_params.keys()): query_dict[k] = query_params[k] if 'origin' in swh_id_parsed.metadata: query_dict['origin'] = swh_id_parsed.metadata['origin'] if object_type == CONTENT: query_string = 'sha1_git:' + object_id fragment = '' if 'lines' in swh_id_parsed.metadata: lines = swh_id_parsed.metadata['lines'].split('-') fragment += '#L' + lines[0] if len(lines) > 1: fragment += '-L' + lines[1] browse_url = reverse('browse-content', url_args={'query_string': query_string}, query_params=query_dict) + fragment elif object_type == DIRECTORY: browse_url = reverse('browse-directory', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == RELEASE: browse_url = reverse('browse-release', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == REVISION: browse_url = reverse('browse-revision', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == SNAPSHOT: browse_url = reverse('browse-snapshot', url_args={'snapshot_id': object_id}, query_params=query_dict) except ValidationError as ve: raise BadInputExc('Error when parsing identifier. %s' % ' '.join(ve.messages)) else: return {'swh_id_parsed': swh_id_parsed, 'browse_url': browse_url} def parse_rst(text, report_level=2): """ Parse a reStructuredText string with docutils. Args: text (str): string with reStructuredText markups in it report_level (int): level of docutils report messages to print (1 info 2 warning 3 error 4 severe 5 none) Returns: docutils.nodes.document: a parsed docutils document """ parser = docutils.parsers.rst.Parser() components = (docutils.parsers.rst.Parser,) settings = docutils.frontend.OptionParser( components=components).get_default_values() settings.report_level = report_level document = docutils.utils.new_document('rst-doc', settings=settings) parser.parse(text, document) return document def get_client_ip(request): """ Return the client IP address from an incoming HTTP request. Args: request (django.http.HttpRequest): the incoming HTTP request Returns: str: The client IP address """ x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') if x_forwarded_for: ip = x_forwarded_for.split(',')[0] else: ip = request.META.get('REMOTE_ADDR') return ip def is_recaptcha_valid(request, recaptcha_response): """ Verify if the response for Google reCAPTCHA is valid. Args: request (django.http.HttpRequest): the incoming HTTP request recaptcha_response (str): the reCAPTCHA response Returns: bool: Whether the reCAPTCHA response is valid or not """ config = get_config() return requests.post( config['grecaptcha']['validation_url'], data={ 'secret': config['grecaptcha']['private_key'], 'response': recaptcha_response, 'remoteip': get_client_ip(request) }, verify=True ).json().get("success", False) def context_processor(request): """ Django context processor used to inject variables in all swh-web templates. """ return {'swh_object_icons': swh_object_icons} diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py index cf9f047c..44a83f05 100644 --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -1,352 +1,352 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.test import APITestCase from unittest.mock import patch from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.tests.testcase import WebTestCase class OriginApiTestCase(WebTestCase, APITestCase): def setUp(self): self.origin_visit1 = { 'date': 1104616800.0, 'origin': 10, 'visit': 100, 'metadata': None, 'status': 'full', } self.origin1 = { 'id': 1234, 'url': 'ftp://some/url/to/origin/0', 'type': 'ftp' } @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits_raise_error( self, mock_get_origin_visits, ): # given mock_get_origin_visits.side_effect = ValueError( 'voluntary error to check the bad request middleware.') # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 400) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'ValueError', 'reason': 'voluntary error to check the bad request middleware.'}) - @patch('swh.web.common.utils.service') + @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits_raise_swh_storage_error_db( - self, mock_service): + self, mock_get_origin_visits): # given - mock_service.lookup_origin_visits.side_effect = StorageDBError( + mock_get_origin_visits.side_effect = StorageDBError( 'Storage exploded! Will be back online shortly!') # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 503) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'StorageDBError', 'reason': 'An unexpected error occurred in the backend: ' 'Storage exploded! Will be back online shortly!'}) - @patch('swh.web.common.utils.service') + @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits_raise_swh_storage_error_api( - self, mock_service): + self, mock_get_origin_visits): # given - mock_service.lookup_origin_visits.side_effect = StorageAPIError( + mock_get_origin_visits.side_effect = StorageAPIError( 'Storage API dropped dead! Will resurrect from its ashes asap!' ) # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 503) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'StorageAPIError', 'reason': 'An unexpected error occurred in the api backend: ' 'Storage API dropped dead! Will resurrect from its ashes asap!' }) @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits(self, mock_get_origin_visits): # given stub_visits = [ { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 1 }, { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 2 }, { 'date': 1420149600.0, 'origin': 2, 'snapshot': '5678', 'visit': 3 }, { 'date': 1420149600.0, 'origin': 2, 'snapshot': '5678', 'visit': 4 } ] mock_get_origin_visits.return_value = stub_visits # when rv = self.client.get('/api/1/origin/2/visits/?per_page=2&last_visit=3') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, [ { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 2, 'origin_visit_url': '/api/1/origin/2/visit/2/', 'snapshot_url': '/api/1/snapshot/1234/' }, { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 1, 'origin_visit_url': '/api/1/origin/2/visit/1/', 'snapshot_url': '/api/1/snapshot/1234/' }, ]) @patch('swh.web.api.views.origin.service') def test_api_1_lookup_origin_visit(self, mock_service): # given origin_visit = self.origin_visit1.copy() origin_visit.update({ 'snapshot': '57478754' }) mock_service.lookup_origin_visit.return_value = origin_visit expected_origin_visit = self.origin_visit1.copy() expected_origin_visit.update({ 'origin_url': '/api/1/origin/10/', 'snapshot': '57478754', 'snapshot_url': '/api/1/snapshot/57478754/' }) # when rv = self.client.get('/api/1/origin/10/visit/100/') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin_visit) mock_service.lookup_origin_visit.assert_called_once_with('10', '100') @patch('swh.web.api.views.origin.service') def test_api_1_lookup_origin_visit_not_found(self, mock_service): # given mock_service.lookup_origin_visit.return_value = None # when rv = self.client.get('/api/1/origin/1/visit/1000/') self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No visit 1000 for origin 1 found' }) mock_service.lookup_origin_visit.assert_called_once_with('1', '1000') @patch('swh.web.api.views.origin.service') def test_api_origin_by_id(self, mock_service): # given mock_service.lookup_origin.return_value = self.origin1 expected_origin = self.origin1.copy() expected_origin.update({ 'origin_visits_url': '/api/1/origin/1234/visits/' }) # when rv = self.client.get('/api/1/origin/1234/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin) mock_service.lookup_origin.assert_called_with({'id': '1234'}) @patch('swh.web.api.views.origin.service') def test_api_origin_by_type_url(self, mock_service): # given stub_origin = self.origin1.copy() stub_origin.update({ 'id': 987 }) mock_service.lookup_origin.return_value = stub_origin expected_origin = stub_origin.copy() expected_origin.update({ 'origin_visits_url': '/api/1/origin/987/visits/' }) # when rv = self.client.get('/api/1/origin/ftp/url' '/ftp://some/url/to/origin/0/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin) mock_service.lookup_origin.assert_called_with( {'url': 'ftp://some/url/to/origin/0', 'type': 'ftp'}) @patch('swh.web.api.views.origin.service') def test_api_origin_not_found(self, mock_service): # given mock_service.lookup_origin.return_value = None # when rv = self.client.get('/api/1/origin/4321/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Origin with id 4321 not found.' }) mock_service.lookup_origin.assert_called_with({'id': '4321'}) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search(self, mock_idx_storage): # given mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .return_value = [{ 'from_revision': b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', 'metadata': {'author': 'Jane Doe'}, 'origin_id': 54974445, 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = [{ 'origin_id': 54974445, 'metadata': {'author': 'Jane Doe'}, 'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238', 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1', } }] self.assertEqual(rv.data, expected_data) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=70) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search_limit(self, mock_idx_storage): # given mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .return_value = [{ 'from_revision': b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', 'metadata': {'author': 'Jane Doe'}, 'origin_id': 54974445, 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(len(rv.data), 1) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=70) # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=10') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(len(rv.data), 1) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=10) # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=987') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(len(rv.data), 1) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=100) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search_invalid(self, mock_idx_storage): rv = self.client.get('/api/1/origin/metadata-search/') # then self.assertEqual(rv.status_code, 400, rv.content) mock_idx_storage.assert_not_called() diff --git a/swh/web/tests/api/views/test_revision.py b/swh/web/tests/api/views/test_revision.py index ae51e1d8..ed583251 100644 --- a/swh/web/tests/api/views/test_revision.py +++ b/swh/web/tests/api/views/test_revision.py @@ -1,892 +1,883 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.test import APITestCase from unittest.mock import patch from swh.web.common.exc import NotFoundExc from swh.web.api.views.revision import ( _revision_directory_by ) from swh.web.tests.testcase import WebTestCase -class ReleaseApiTestCase(WebTestCase, APITestCase): +class RevisionApiTestCase(WebTestCase, APITestCase): @patch('swh.web.api.views.revision.service') def test_api_revision(self, mock_service): # given stub_revision = { 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': ['8734ef7e7c357ce2af928115c6c6a42b7e2a44e7'], 'type': 'tar', 'synthetic': True, 'metadata': { 'original_artifact': [{ 'archive_type': 'tar', 'name': 'webbase-5.7.0.tar.gz', 'sha1': '147f73f369733d088b7a6fa9c4e0273dcd3c7ccd', 'sha1_git': '6a15ea8b881069adedf11feceec35588f2cfe8f1', 'sha256': '401d0df797110bea805d358b85bcc1ced29549d3d73f' '309d36484e7edf7bb912' }] }, } mock_service.lookup_revision.return_value = stub_revision expected_revision = { 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'url': '/api/1/revision/18d8be353ed3480476f032475e7c233eff7371d5/', 'history_url': '/api/1/revision/18d8be353ed3480476f032475e7c233e' 'ff7371d5/log/', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory_url': '/api/1/directory/7834ef7e7c357ce2af928115c6c6' 'a42b7e2a44e6/', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': [{ 'id': '8734ef7e7c357ce2af928115c6c6a42b7e2a44e7', 'url': '/api/1/revision/8734ef7e7c357ce2af928115c6c6a42b7e2a44e7/' # noqa }], 'type': 'tar', 'synthetic': True, 'metadata': { 'original_artifact': [{ 'archive_type': 'tar', 'name': 'webbase-5.7.0.tar.gz', 'sha1': '147f73f369733d088b7a6fa9c4e0273dcd3c7ccd', 'sha1_git': '6a15ea8b881069adedf11feceec35588f2cfe8f1', 'sha256': '401d0df797110bea805d358b85bcc1ced29549d3d73f' '309d36484e7edf7bb912' }] }, } # when rv = self.client.get('/api/1/revision/' '18d8be353ed3480476f032475e7c233eff7371d5/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(expected_revision, rv.data) mock_service.lookup_revision.assert_called_once_with( '18d8be353ed3480476f032475e7c233eff7371d5') @patch('swh.web.api.views.revision.service') def test_api_revision_not_found(self, mock_service): # given mock_service.lookup_revision.return_value = None # when rv = self.client.get('/api/1/revision/12345/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git 12345 not found.'}) @patch('swh.web.api.views.revision.service') def test_api_revision_raw_ok(self, mock_service): # given stub_revision = {'message': 'synthetic revision message'} mock_service.lookup_revision_message.return_value = stub_revision # when rv = self.client.get('/api/1/revision/18d8be353ed3480476f032475e7c2' '33eff7371d5/raw/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/octet-stream') self.assertEqual(rv.content, b'synthetic revision message') mock_service.lookup_revision_message.assert_called_once_with( '18d8be353ed3480476f032475e7c233eff7371d5') @patch('swh.web.api.views.revision.service') def test_api_revision_raw_ok_no_msg(self, mock_service): # given mock_service.lookup_revision_message.side_effect = NotFoundExc( 'No message for revision') # when rv = self.client.get('/api/1/revision/' '18d8be353ed3480476f032475e7c233eff7371d5/raw/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No message for revision'}) self.assertEqual mock_service.lookup_revision_message.assert_called_once_with( '18d8be353ed3480476f032475e7c233eff7371d5') @patch('swh.web.api.views.revision.service') def test_api_revision_raw_ko_no_rev(self, mock_service): # given mock_service.lookup_revision_message.side_effect = NotFoundExc( 'No revision found') # when rv = self.client.get('/api/1/revision/' '18d8be353ed3480476f032475e7c233eff7371d5/raw/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No revision found'}) mock_service.lookup_revision_message.assert_called_once_with( '18d8be353ed3480476f032475e7c233eff7371d5') @patch('swh.web.api.views.revision.service') def test_api_revision_with_origin_not_found(self, mock_service): mock_service.lookup_revision_by.return_value = None rv = self.client.get('/api/1/revision/origin/123/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertIn('Revision with (origin_id: 123', rv.data['reason']) self.assertIn('not found', rv.data['reason']) self.assertEqual('NotFoundExc', rv.data['exception']) mock_service.lookup_revision_by.assert_called_once_with( '123', - 'refs/heads/master', + 'HEAD', None) @patch('swh.web.api.views.revision.service') def test_api_revision_with_origin(self, mock_service): mock_revision = { 'id': '32', 'directory': '21', 'message': 'message 1', 'type': 'deb', } expected_revision = { 'id': '32', 'url': '/api/1/revision/32/', 'history_url': '/api/1/revision/32/log/', 'directory': '21', 'directory_url': '/api/1/directory/21/', 'message': 'message 1', 'type': 'deb', } mock_service.lookup_revision_by.return_value = mock_revision rv = self.client.get('/api/1/revision/origin/1/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) mock_service.lookup_revision_by.assert_called_once_with( '1', - 'refs/heads/master', + 'HEAD', None) @patch('swh.web.api.views.revision.service') def test_api_revision_with_origin_and_branch_name(self, mock_service): mock_revision = { 'id': '12', 'directory': '23', 'message': 'message 2', 'type': 'tar', } mock_service.lookup_revision_by.return_value = mock_revision expected_revision = { 'id': '12', 'url': '/api/1/revision/12/', 'history_url': '/api/1/revision/12/log/', 'directory': '23', 'directory_url': '/api/1/directory/23/', 'message': 'message 2', 'type': 'tar', } rv = self.client.get('/api/1/revision/origin/1' '/branch/refs/origin/dev/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) mock_service.lookup_revision_by.assert_called_once_with( '1', 'refs/origin/dev', None) - @patch('swh.web.api.views.revision.parse_timestamp') @patch('swh.web.api.views.revision.service') @patch('swh.web.api.views.revision.utils') def test_api_revision_with_origin_and_branch_name_and_timestamp(self, mock_utils, - mock_service, - mock_parse_timestamp): # noqa + mock_service): # noqa mock_revision = { 'id': '123', 'directory': '456', 'message': 'message 3', 'type': 'tar', } mock_service.lookup_revision_by.return_value = mock_revision expected_revision = { 'id': '123', 'url': '/api/1/revision/123/', 'history_url': '/api/1/revision/123/log/', 'directory': '456', 'directory_url': '/api/1/directory/456/', 'message': 'message 3', 'type': 'tar', } - mock_parse_timestamp.return_value = 'parsed-date' mock_utils.enrich_revision.return_value = expected_revision rv = self.client.get('/api/1/revision' '/origin/1' '/branch/refs/origin/dev' '/ts/1452591542/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) mock_service.lookup_revision_by.assert_called_once_with( '1', 'refs/origin/dev', - 'parsed-date') - mock_parse_timestamp.assert_called_once_with('1452591542') + '1452591542') mock_utils.enrich_revision.assert_called_once_with( mock_revision) - @patch('swh.web.api.views.revision.parse_timestamp') @patch('swh.web.api.views.revision.service') @patch('swh.web.api.views.revision.utils') def test_api_revision_with_origin_and_branch_name_and_timestamp_escapes( self, mock_utils, - mock_service, - mock_parse_timestamp): + mock_service): mock_revision = { 'id': '999', } mock_service.lookup_revision_by.return_value = mock_revision expected_revision = { 'id': '999', 'url': '/api/1/revision/999/', 'history_url': '/api/1/revision/999/log/', } - mock_parse_timestamp.return_value = 'parsed-date' mock_utils.enrich_revision.return_value = expected_revision rv = self.client.get('/api/1/revision' '/origin/1' '/branch/refs%2Forigin%2Fdev' '/ts/Today%20is%20' 'January%201,%202047%20at%208:21:00AM/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) mock_service.lookup_revision_by.assert_called_once_with( '1', 'refs/origin/dev', - 'parsed-date') - mock_parse_timestamp.assert_called_once_with( 'Today is January 1, 2047 at 8:21:00AM') mock_utils.enrich_revision.assert_called_once_with( mock_revision) @patch('swh.web.api.views.revision.service') def test_revision_directory_by_ko_raise(self, mock_service): # given mock_service.lookup_directory_through_revision.side_effect = NotFoundExc('not') # noqa # when with self.assertRaises(NotFoundExc): _revision_directory_by( {'sha1_git': 'id'}, None, '/api/1/revision/sha1/directory/') # then mock_service.lookup_directory_through_revision.assert_called_once_with( {'sha1_git': 'id'}, None, limit=100, with_data=False) @patch('swh.web.api.views.revision.service') def test_revision_directory_by_type_dir(self, mock_service): # given mock_service.lookup_directory_through_revision.return_value = ( 'rev-id', { 'type': 'dir', 'revision': 'rev-id', 'path': 'some/path', 'content': [] }) # when actual_dir_content = _revision_directory_by( {'sha1_git': 'blah-id'}, 'some/path', '/api/1/revision/sha1/directory/') # then self.assertEqual(actual_dir_content, { 'type': 'dir', 'revision': 'rev-id', 'path': 'some/path', 'content': [] }) mock_service.lookup_directory_through_revision.assert_called_once_with( {'sha1_git': 'blah-id'}, 'some/path', limit=100, with_data=False) @patch('swh.web.api.views.revision.service') def test_revision_directory_by_type_file(self, mock_service): # given mock_service.lookup_directory_through_revision.return_value = ( 'rev-id', { 'type': 'file', 'revision': 'rev-id', 'path': 'some/path', 'content': {'blah': 'blah'} }) # when actual_dir_content = _revision_directory_by( {'sha1_git': 'sha1'}, 'some/path', '/api/1/revision/origin/2/directory/', limit=1000, with_data=True) # then self.assertEqual(actual_dir_content, { 'type': 'file', 'revision': 'rev-id', 'path': 'some/path', 'content': {'blah': 'blah'} }) mock_service.lookup_directory_through_revision.assert_called_once_with( {'sha1_git': 'sha1'}, 'some/path', limit=1000, with_data=True) @patch('swh.web.api.views.revision.parse_timestamp') @patch('swh.web.api.views.revision._revision_directory_by') @patch('swh.web.api.views.revision.utils') def test_api_directory_through_revision_origin_ko_not_found(self, mock_utils, mock_rev_dir, mock_parse_timestamp): # noqa mock_rev_dir.side_effect = NotFoundExc('not found') mock_parse_timestamp.return_value = '2012-10-20 00:00:00' rv = self.client.get('/api/1/revision' '/origin/10' '/branch/refs/remote/origin/dev' '/ts/2012-10-20' '/directory/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'not found'}) mock_rev_dir.assert_called_once_with( {'origin_id': '10', 'branch_name': 'refs/remote/origin/dev', 'ts': '2012-10-20 00:00:00'}, None, '/api/1/revision' '/origin/10' '/branch/refs/remote/origin/dev' '/ts/2012-10-20' '/directory/', with_data=False) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_directory_through_revision_origin(self, mock_revision_dir): expected_res = [{ 'id': '123' }] mock_revision_dir.return_value = expected_res rv = self.client.get('/api/1/revision/origin/3/directory/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_res) mock_revision_dir.assert_called_once_with({ 'origin_id': '3', 'branch_name': 'refs/heads/master', 'ts': None}, None, '/api/1/revision/origin/3/directory/', with_data=False) @patch('swh.web.api.views.revision.service') def test_api_revision_log(self, mock_service): # given stub_revisions = [{ 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': ['7834ef7e7c357ce2af928115c6c6a42b7e2a4345'], 'type': 'tar', 'synthetic': True, }] mock_service.lookup_revision_log.return_value = stub_revisions expected_revisions = [{ 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'url': '/api/1/revision/18d8be353ed3480476f032475e7c233eff7371d5/', 'history_url': '/api/1/revision/18d8be353ed3480476f032475e7c233ef' 'f7371d5/log/', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory_url': '/api/1/directory/7834ef7e7c357ce2af928115c6c6a' '42b7e2a44e6/', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': [{ 'id': '7834ef7e7c357ce2af928115c6c6a42b7e2a4345', 'url': '/api/1/revision/7834ef7e7c357ce2af928115c6c6a42b7e2a4345/', # noqa }], 'type': 'tar', 'synthetic': True, }] # when rv = self.client.get('/api/1/revision/8834ef7e7c357ce2af928115c6c6a42' 'b7e2a44e6/log/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revisions) self.assertFalse(rv.has_header('Link')) mock_service.lookup_revision_log.assert_called_once_with( '8834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 11) @patch('swh.web.api.views.revision.service') def test_api_revision_log_with_next(self, mock_service): # given stub_revisions = [] for i in range(27): stub_revisions.append({'id': str(i)}) mock_service.lookup_revision_log.return_value = stub_revisions[:26] expected_revisions = [x for x in stub_revisions if int(x['id']) < 25] for e in expected_revisions: e['url'] = '/api/1/revision/%s/' % e['id'] e['history_url'] = '/api/1/revision/%s/log/' % e['id'] # when rv = self.client.get('/api/1/revision/8834ef7e7c357ce2af928115c6c6a42' 'b7e2a44e6/log/?per_page=25') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revisions) self.assertEqual(rv['Link'], '; rel="next"') mock_service.lookup_revision_log.assert_called_once_with( '8834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 26) @patch('swh.web.api.views.revision.service') def test_api_revision_log_not_found(self, mock_service): # given mock_service.lookup_revision_log.return_value = None # when rv = self.client.get('/api/1/revision/8834ef7e7c357ce2af928115c6c6' 'a42b7e2a44e6/log/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git' ' 8834ef7e7c357ce2af928115c6c6a42b7e2a44e6 not found.'}) self.assertFalse(rv.has_header('Link')) mock_service.lookup_revision_log.assert_called_once_with( '8834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 11) @patch('swh.web.api.views.revision.service') def test_api_revision_log_context(self, mock_service): # given stub_revisions = [{ 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': ['7834ef7e7c357ce2af928115c6c6a42b7e2a4345'], 'type': 'tar', 'synthetic': True, }] mock_service.lookup_revision_log.return_value = stub_revisions mock_service.lookup_revision_multiple.return_value = [{ 'id': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory': '18d8be353ed3480476f032475e7c233eff7371d5', 'author_name': 'Name Surname', 'author_email': 'name@surname.com', 'committer_name': 'Name Surname', 'committer_email': 'name@surname.com', 'message': 'amazing revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': ['adc83b19e793491b1c6ea0fd8b46cd9f32e592fc'], 'type': 'tar', 'synthetic': True, }] expected_revisions = [ { 'url': '/api/1/revision/' '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6/', 'history_url': '/api/1/revision/' '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6/log/', 'id': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory_url': '/api/1/directory/' '18d8be353ed3480476f032475e7c233eff7371d5/', 'author_name': 'Name Surname', 'author_email': 'name@surname.com', 'committer_name': 'Name Surname', 'committer_email': 'name@surname.com', 'message': 'amazing revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': [{ 'id': 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', 'url': '/api/1/revision/adc83b19e793491b1c6ea0fd8b46cd9f32e592fc/', # noqa }], 'type': 'tar', 'synthetic': True, }, { 'url': '/api/1/revision/' '18d8be353ed3480476f032475e7c233eff7371d5/', 'history_url': '/api/1/revision/' '18d8be353ed3480476f032475e7c233eff7371d5/log/', 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory_url': '/api/1/directory/' '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6/', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': [{ 'id': '7834ef7e7c357ce2af928115c6c6a42b7e2a4345', 'url': '/api/1/revision/7834ef7e7c357ce2af928115c6c6a42b7e2a4345/', # noqa }], 'type': 'tar', 'synthetic': True, }] # when rv = self.client.get('/api/1/revision/18d8be353ed3480476f0' '32475e7c233eff7371d5/prev/21145781e2' '6ad1f978e/log/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(expected_revisions, rv.data) self.assertFalse(rv.has_header('Link')) mock_service.lookup_revision_log.assert_called_once_with( '18d8be353ed3480476f032475e7c233eff7371d5', 11) mock_service.lookup_revision_multiple.assert_called_once_with( ['21145781e26ad1f978e']) @patch('swh.web.api.views.revision.service') def test_api_revision_log_by(self, mock_service): # given stub_revisions = [{ 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': ['7834ef7e7c357ce2af928115c6c6a42b7e2a4345'], 'type': 'tar', 'synthetic': True, }] mock_service.lookup_revision_log_by.return_value = stub_revisions expected_revisions = [{ 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'url': '/api/1/revision/18d8be353ed3480476f032475e7c233eff7371d5/', 'history_url': '/api/1/revision/18d8be353ed3480476f032475e7c233ef' 'f7371d5/log/', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'directory_url': '/api/1/directory/7834ef7e7c357ce2af928115c6c6a' '42b7e2a44e6/', 'author_name': 'Software Heritage', 'author_email': 'robot@softwareheritage.org', 'committer_name': 'Software Heritage', 'committer_email': 'robot@softwareheritage.org', 'message': 'synthetic revision message', 'date_offset': 0, 'committer_date_offset': 0, 'parents': [{ 'id': '7834ef7e7c357ce2af928115c6c6a42b7e2a4345', 'url': '/api/1/revision/7834ef7e7c357ce2af928115c6c6a42b7e2a4345/' # noqa }], 'type': 'tar', 'synthetic': True, }] # when rv = self.client.get('/api/1/revision/origin/1/log/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revisions) self.assertFalse(rv.has_header('Link')) mock_service.lookup_revision_log_by.assert_called_once_with( - '1', 'refs/heads/master', None, 11) + '1', 'HEAD', None, 11) @patch('swh.web.api.views.revision.service') def test_api_revision_log_by_with_next(self, mock_service): # given stub_revisions = [] for i in range(27): stub_revisions.append({'id': str(i)}) mock_service.lookup_revision_log_by.return_value = stub_revisions[:26] expected_revisions = [x for x in stub_revisions if int(x['id']) < 25] for e in expected_revisions: e['url'] = '/api/1/revision/%s/' % e['id'] e['history_url'] = '/api/1/revision/%s/log/' % e['id'] # when rv = self.client.get('/api/1/revision/origin/1/log/?per_page=25') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertIsNotNone(rv['Link']) self.assertEqual(rv.data, expected_revisions) mock_service.lookup_revision_log_by.assert_called_once_with( - '1', 'refs/heads/master', None, 26) + '1', 'HEAD', None, 26) @patch('swh.web.api.views.revision.service') def test_api_revision_log_by_norev(self, mock_service): # given mock_service.lookup_revision_log_by.side_effect = NotFoundExc( 'No revision') # when rv = self.client.get('/api/1/revision/origin/1/log/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertFalse(rv.has_header('Link')) self.assertEqual(rv.data, {'exception': 'NotFoundExc', 'reason': 'No revision'}) mock_service.lookup_revision_log_by.assert_called_once_with( - '1', 'refs/heads/master', None, 11) + '1', 'HEAD', None, 11) @patch('swh.web.api.views.revision.service') def test_api_revision_history(self, mock_service): # for readability purposes, we use: # - sha1 as 3 letters (url are way too long otherwise to respect pep8) # - only keys with modification steps (all other keys are kept as is) # given stub_revision = { 'id': '883', 'children': ['777', '999'], 'parents': [], 'directory': '272' } mock_service.lookup_revision.return_value = stub_revision # then rv = self.client.get('/api/1/revision/883/prev/999/') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'id': '883', 'url': '/api/1/revision/883/', 'history_url': '/api/1/revision/883/log/', 'history_context_url': '/api/1/revision/883/prev/999/log/', 'children': ['777', '999'], 'children_urls': ['/api/1/revision/777/', '/api/1/revision/999/'], 'parents': [], 'directory': '272', 'directory_url': '/api/1/directory/272/' }) mock_service.lookup_revision.assert_called_once_with('883') @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ko_not_found(self, mock_rev_dir): # given mock_rev_dir.side_effect = NotFoundExc('Not found') # then rv = self.client.get('/api/1/revision/999/directory/some/path/to/dir/') self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Not found'}) mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path/to/dir', '/api/1/revision/999/directory/some/path/to/dir/', with_data=False) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ok_returns_dir_entries(self, mock_rev_dir): stub_dir = { 'type': 'dir', 'revision': '999', 'content': [ { 'sha1_git': '789', 'type': 'file', 'target': '101', 'target_url': '/api/1/content/sha1_git:101/', 'name': 'somefile', 'file_url': '/api/1/revision/999/directory/some/path/' 'somefile/' }, { 'sha1_git': '123', 'type': 'dir', 'target': '456', 'target_url': '/api/1/directory/456/', 'name': 'to-subdir', 'dir_url': '/api/1/revision/999/directory/some/path/' 'to-subdir/', }] } # given mock_rev_dir.return_value = stub_dir # then rv = self.client.get('/api/1/revision/999/directory/some/path/') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, stub_dir) mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path', '/api/1/revision/999/directory/some/path/', with_data=False) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ok_returns_content(self, mock_rev_dir): stub_content = { 'type': 'file', 'revision': '999', 'content': { 'sha1_git': '789', 'sha1': '101', 'data_url': '/api/1/content/101/raw/', } } # given mock_rev_dir.return_value = stub_content # then url = '/api/1/revision/666/directory/some/other/path/' rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, stub_content) mock_rev_dir.assert_called_once_with( {'sha1_git': '666'}, 'some/other/path', url, with_data=False) diff --git a/swh/web/tests/browse/test_utils.py b/swh/web/tests/browse/test_utils.py index bd87b292..af1da9cd 100644 --- a/swh/web/tests/browse/test_utils.py +++ b/swh/web/tests/browse/test_utils.py @@ -1,244 +1,169 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from unittest.mock import patch from swh.web.browse import utils from swh.web.common.exc import NotFoundExc from swh.web.common.utils import reverse from swh.web.tests.testcase import WebTestCase from .views.data.revision_test_data import revision_history_log_test class SwhBrowseUtilsTestCase(WebTestCase): def test_get_mimetype_and_encoding_for_content(self): text = b'Hello world!' self.assertEqual(utils.get_mimetype_and_encoding_for_content(text), ('text/plain', 'us-ascii')) - @patch('swh.web.browse.utils.get_origin_visits') - def test_get_origin_visit(self, mock_origin_visits): - origin_info = { - 'id': 2, - 'type': 'git', - 'url': 'https://github.com/foo/bar', - } - visits = \ - [{'status': 'full', - 'date': '2015-07-09T21:09:24+00:00', - 'visit': 1, - 'origin': origin_info['id'] - }, - {'status': 'full', - 'date': '2016-02-23T18:05:23.312045+00:00', - 'visit': 2, - 'origin': origin_info['id'] - }, - {'status': 'full', - 'date': '2016-03-28T01:35:06.554111+00:00', - 'visit': 3, - 'origin': origin_info['id'] - }, - {'status': 'full', - 'date': '2016-06-18T01:22:24.808485+00:00', - 'visit': 4, - 'origin': origin_info['id'] - }, - {'status': 'full', - 'date': '2016-08-14T12:10:00.536702+00:00', - 'visit': 5, - 'origin': origin_info['id'] - }] - mock_origin_visits.return_value = visits - - visit_id = 12 - with self.assertRaises(NotFoundExc) as cm: - visit = utils.get_origin_visit(origin_info, - visit_id=visit_id) - exception_text = cm.exception.args[0] - self.assertIn('Visit with id %s' % visit_id, exception_text) - self.assertIn('type %s' % origin_info['type'], exception_text) - self.assertIn('url %s' % origin_info['url'], exception_text) - - visit = utils.get_origin_visit(origin_info, visit_id=2) - self.assertEqual(visit, visits[1]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2016-02-23T18:05:23.312045+00:00') - self.assertEqual(visit, visits[1]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2016-02-20') - self.assertEqual(visit, visits[1]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2016-06-18T01:22') - self.assertEqual(visit, visits[3]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2016-06-18 01:22') - self.assertEqual(visit, visits[3]) - - visit = utils.get_origin_visit( - origin_info, visit_ts=1466208000) - self.assertEqual(visit, visits[3]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2014-01-01') - self.assertEqual(visit, visits[0]) - - visit = utils.get_origin_visit( - origin_info, visit_ts='2018-01-01') - self.assertEqual(visit, visits[-1]) - @patch('swh.web.browse.utils.service') @patch('swh.web.browse.utils.get_origin_visit') def test_get_origin_visit_snapshot(self, mock_get_origin_visit, mock_service): mock_get_origin_visit.return_value = \ {'status': 'full', 'date': '2015-08-04T22:26:14.804009+00:00', 'visit': 1, 'origin': 1, 'snapshot': '584b2fe3ce6218a96892e73bd76c2966bbc2a797'} mock_service.lookup_snapshot.return_value = \ {'branches': { 'HEAD': { 'target': '9fbd21adbac36be869514e82e2e98505dc47219c', 'target_type': 'revision', 'target_url': '/api/1/revision/9fbd21adbac36be869514e82e2e98505dc47219c/' }, 'refs/heads/master': { 'target': '9fbd21adbac36be869514e82e2e98505dc47219c', 'target_type': 'revision', 'target_url': '/api/1/revision/9fbd21adbac36be869514e82e2e98505dc47219c/' }, 'refs/tags/0.10.0': { 'target': '7045404f3d1c54e6473c71bbb716529fbad4be24', 'target_type': 'release', 'target_url': '/api/1/release/7045404f3d1c54e6473c71bbb716529fbad4be24/' }, 'refs/tags/0.10.1': { 'target': 'c893f4549c367e68288b0eb74595050410aa0de7', 'target_type': 'release', 'target_url': '/api/1/release/c893f4549c367e68288b0eb74595050410aa0de7/' } }, 'id': '584b2fe3ce6218a96892e73bd76c2966bbc2a797'} mock_service.lookup_release_multiple.return_value = \ [{'name': '0.10.0', 'message': '0.10: The "Oh fuck it\'s PyCon" release\n', 'id': '7045404f3d1c54e6473c71bbb716529fbad4be24', 'date': '2014-04-10T23:01:28-04:00', 'target_type': 'revision', 'target': '6072557b6c10cd9a21145781e26ad1f978ed14b9'}, {'name': '0.10.1', 'message': 'Tagging 0.10.1\n', 'id': 'c893f4549c367e68288b0eb74595050410aa0de7', 'date': '2014-10-10T09:45:52-04:00', 'target_type': 'revision', 'target': 'ecc003b43433e5b46511157598e4857a761007bf'}] mock_service.lookup_revision_multiple.return_value = \ [{'date': '2015-08-04T13:16:54+03:00', 'directory': '828da2b80e41aa958b2c98526f4a1d2cc7d298b7', 'id': '9fbd21adbac36be869514e82e2e98505dc47219c', 'message': 'Merge pull request #678 from algernon'}, {'date': '2014-04-10T23:01:11-04:00', 'directory': '2df4cd84ecc65b50b1d5318d3727e02a39b8a4cf', 'id': '6072557b6c10cd9a21145781e26ad1f978ed14b9', 'message': '0.10: The "Oh fuck it\'s PyCon" release\n'}, {'date': '2014-10-10T09:45:23-04:00', 'directory': '28ba64f97ef709e54838ae482c2da2619a74a0bd', 'id': 'ecc003b43433e5b46511157598e4857a761007bf', 'message': '0.10.1\n'}] expected_result = ( [{'name': 'HEAD', 'message': 'Merge pull request #678 from algernon', 'date': '04 August 2015, 10:16 UTC', 'revision': '9fbd21adbac36be869514e82e2e98505dc47219c', 'directory': '828da2b80e41aa958b2c98526f4a1d2cc7d298b7'}, {'name': 'refs/heads/master', 'message': 'Merge pull request #678 from algernon', 'date': '04 August 2015, 10:16 UTC', 'revision': '9fbd21adbac36be869514e82e2e98505dc47219c', 'directory': '828da2b80e41aa958b2c98526f4a1d2cc7d298b7'}], [{'name': '0.10.0', 'branch_name': 'refs/tags/0.10.0', 'id': '7045404f3d1c54e6473c71bbb716529fbad4be24', 'message': '0.10: The "Oh fuck it\'s PyCon" release\n', 'date': '11 April 2014, 03:01 UTC', 'target_type': 'revision', 'target': '6072557b6c10cd9a21145781e26ad1f978ed14b9', 'directory': '2df4cd84ecc65b50b1d5318d3727e02a39b8a4cf'}, {'name': '0.10.1', 'branch_name': 'refs/tags/0.10.1', 'id': 'c893f4549c367e68288b0eb74595050410aa0de7', 'message': 'Tagging 0.10.1\n', 'date': '10 October 2014, 13:45 UTC', 'target_type': 'revision', 'target': 'ecc003b43433e5b46511157598e4857a761007bf', 'directory': '28ba64f97ef709e54838ae482c2da2619a74a0bd'}] ) origin_info = { 'id': 1, 'type': 'git', 'url': 'https://github.com/hylang/hy' } origin_visit_branches = \ utils.get_origin_visit_snapshot(origin_info, visit_id=1) lookup_release_calls = mock_service.lookup_release_multiple.call_args_list self.assertEqual(len(lookup_release_calls), 1) # Check that we looked up the two expected releases self.assertCountEqual(set(lookup_release_calls[0][0][0]), { '7045404f3d1c54e6473c71bbb716529fbad4be24', 'c893f4549c367e68288b0eb74595050410aa0de7', }) lookup_revision_calls = mock_service.lookup_revision_multiple.call_args_list self.assertEqual(len(lookup_revision_calls), 1) # Check that we looked up the three expected revisions self.assertCountEqual(set(lookup_revision_calls[0][0][0]), { '9fbd21adbac36be869514e82e2e98505dc47219c', '6072557b6c10cd9a21145781e26ad1f978ed14b9', 'ecc003b43433e5b46511157598e4857a761007bf', }) self.assertEqual(origin_visit_branches, expected_result) def test_gen_link(self): self.assertEqual(utils.gen_link('https://www.softwareheritage.org/', 'swh'), 'swh') def test_gen_person_link(self): person_id = 8221896 person_name = 'Antoine Lambert' person_url = reverse('browse-person', url_args={'person_id': person_id}) self.assertEqual(utils.gen_person_link(person_id, person_name), '%s' % (person_url, person_name)) def test_gen_revision_link(self): revision_id = '28a0bc4120d38a394499382ba21d6965a67a3703' revision_url = reverse('browse-revision', url_args={'sha1_git': revision_id}) self.assertEqual(utils.gen_revision_link(revision_id), '%s' % (revision_url, revision_id)) self.assertEqual(utils.gen_revision_link(revision_id, shorten_id=True), '%s' % (revision_url, revision_id[:7])) diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py index 42a9a1f6..987c4fc2 100644 --- a/swh/web/tests/browse/views/test_origin.py +++ b/swh/web/tests/browse/views/test_origin.py @@ -1,914 +1,914 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from unittest.mock import patch from django.utils.html import escape from swh.web.common.exc import NotFoundExc from swh.web.common.utils import ( reverse, gen_path_info, format_utc_iso_date, parse_timestamp, get_swh_persistent_id ) from swh.web.tests.testcase import WebTestCase from .data.origin_test_data import ( origin_info_test_data, origin_visits_test_data, stub_content_origin_info, stub_content_origin_visit_id, stub_content_origin_visit_unix_ts, stub_content_origin_visit_iso_date, stub_content_origin_branch, stub_content_origin_visits, stub_content_origin_snapshot, stub_origin_info, stub_visit_id, stub_origin_visits, stub_origin_snapshot, stub_origin_root_directory_entries, stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_path, stub_origin_sub_directory_entries, stub_visit_unix_ts, stub_visit_iso_date ) from .data.content_test_data import ( stub_content_root_dir, stub_content_text_data, stub_content_text_path ) stub_origin_info_no_type = dict(stub_origin_info) stub_origin_info_no_type['type'] = None def _to_snapshot_dict(branches=None, releases=None): snp = {'branches': {}} if branches: for b in branches: snp['branches'][b['name']] = { 'target': b['revision'], 'target_type': 'revision' } if releases: for r in releases: snp['branches'][r['branch_name']] = { 'target': r['id'], 'target_type': 'release' } return snp class SwhBrowseOriginTest(WebTestCase): @patch('swh.web.browse.utils.service') @patch('swh.web.browse.utils.get_origin_visit_snapshot') - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_info') @patch('swh.web.browse.views.origin.get_origin_info') @patch('swh.web.browse.views.origin.get_origin_visits') @patch('swh.web.browse.views.origin.service') def test_origin_visits_browse(self, mock_service, mock_get_origin_visits, mock_get_origin_info, mock_get_origin_info_utils, mock_get_origin_visits_utils, mock_get_origin_visit_snapshot, mock_utils_service): mock_service.lookup_origin.return_value = origin_info_test_data mock_get_origin_info.return_value = origin_info_test_data mock_get_origin_info_utils.return_value = origin_info_test_data mock_get_origin_visits.return_value = origin_visits_test_data mock_get_origin_visits_utils.return_value = origin_visits_test_data mock_get_origin_visit_snapshot.return_value = stub_content_origin_snapshot mock_utils_service.lookup_snapshot_size.return_value = { 'revision': len(stub_content_origin_snapshot[0]), 'release': len(stub_content_origin_snapshot[1]) } url = reverse('browse-origin-visits', url_args={'origin_type': origin_info_test_data['type'], 'origin_url': origin_info_test_data['url']}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('origin-visits.html') url = reverse('browse-origin-visits', url_args={'origin_url': origin_info_test_data['url']}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('origin-visits.html') def origin_content_view_helper(self, origin_info, origin_visits, origin_branches, origin_releases, origin_branch, root_dir_sha1, content_sha1, content_sha1_git, content_path, content_data, content_language, visit_id=None, timestamp=None): url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url'], 'path': content_path} if not visit_id: visit_id = origin_visits[-1]['visit'] query_params = {} if timestamp: url_args['timestamp'] = timestamp if visit_id: query_params['visit_id'] = visit_id url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertContains(resp, '' % content_language) self.assertContains(resp, escape(content_data)) split_path = content_path.split('/') filename = split_path[-1] path = content_path.replace(filename, '')[:-1] path_info = gen_path_info(path) del url_args['path'] if timestamp: url_args['timestamp'] = \ format_utc_iso_date(parse_timestamp(timestamp).isoformat(), '%Y-%m-%dT%H:%M:%S') root_dir_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '
  • ', count=len(path_info)+1) self.assertContains(resp, '%s' % (root_dir_url, root_dir_sha1[:7])) for p in path_info: url_args['path'] = p['path'] dir_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '%s' % (dir_url, p['name'])) self.assertContains(resp, '
  • %s
  • ' % filename) query_string = 'sha1_git:' + content_sha1 url_raw = reverse('browse-content-raw', url_args={'query_string': query_string}, query_params={'filename': filename}) self.assertContains(resp, url_raw) del url_args['path'] origin_branches_url = \ reverse('browse-origin-branches', url_args=url_args, query_params=query_params) self.assertContains(resp, 'Branches (%s)' % (origin_branches_url, len(origin_branches))) origin_releases_url = \ reverse('browse-origin-releases', url_args=url_args, query_params=query_params) self.assertContains(resp, 'Releases (%s)' % (origin_releases_url, len(origin_releases))) self.assertContains(resp, '
  • ', count=len(origin_branches)) url_args['path'] = content_path for branch in origin_branches: query_params['branch'] = branch['name'] root_dir_branch_url = \ reverse('browse-origin-content', url_args=url_args, query_params=query_params) self.assertContains(resp, '' % root_dir_branch_url) self.assertContains(resp, '
  • ', count=len(origin_releases)) query_params['branch'] = None for release in origin_releases: query_params['release'] = release['name'] root_dir_release_url = \ reverse('browse-origin-content', url_args=url_args, query_params=query_params) self.assertContains(resp, '' % root_dir_release_url) del url_args['origin_type'] url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('content.html') swh_cnt_id = get_swh_persistent_id('content', content_sha1_git) swh_cnt_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_cnt_id}) self.assertContains(resp, swh_cnt_id) self.assertContains(resp, swh_cnt_id_url) - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.views.utils.snapshot_context.service') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.utils.snapshot_context.request_content') def test_origin_content_view(self, mock_request_content, mock_utils_service, mock_service, mock_get_origin_visit_snapshot, mock_get_origin_visits): stub_content_text_sha1 = stub_content_text_data['checksums']['sha1'] stub_content_text_sha1_git = stub_content_text_data['checksums']['sha1_git'] mock_get_origin_visits.return_value = stub_content_origin_visits mock_get_origin_visit_snapshot.return_value = stub_content_origin_snapshot mock_service.lookup_directory_with_path.return_value = \ {'target': stub_content_text_sha1} mock_request_content.return_value = stub_content_text_data mock_utils_service.lookup_origin.return_value = stub_content_origin_info mock_utils_service.lookup_snapshot_size.return_value = { 'revision': len(stub_content_origin_snapshot[0]), 'release': len(stub_content_origin_snapshot[1]) } self.origin_content_view_helper(stub_content_origin_info, stub_content_origin_visits, stub_content_origin_snapshot[0], stub_content_origin_snapshot[1], stub_content_origin_branch, stub_content_root_dir, stub_content_text_sha1, stub_content_text_sha1_git, stub_content_text_path, stub_content_text_data['raw_data'], 'cpp') self.origin_content_view_helper(stub_content_origin_info, stub_content_origin_visits, stub_content_origin_snapshot[0], stub_content_origin_snapshot[1], stub_content_origin_branch, stub_content_root_dir, stub_content_text_sha1, stub_content_text_sha1_git, stub_content_text_path, stub_content_text_data['raw_data'], 'cpp', visit_id=stub_content_origin_visit_id) self.origin_content_view_helper(stub_content_origin_info, stub_content_origin_visits, stub_content_origin_snapshot[0], stub_content_origin_snapshot[1], stub_content_origin_branch, stub_content_root_dir, stub_content_text_sha1, stub_content_text_sha1_git, stub_content_text_path, stub_content_text_data['raw_data'], 'cpp', timestamp=stub_content_origin_visit_unix_ts) self.origin_content_view_helper(stub_content_origin_info, stub_content_origin_visits, stub_content_origin_snapshot[0], stub_content_origin_snapshot[1], stub_content_origin_branch, stub_content_root_dir, stub_content_text_sha1, stub_content_text_sha1_git, stub_content_text_path, stub_content_text_data['raw_data'], 'cpp', timestamp=stub_content_origin_visit_iso_date) def origin_directory_view_helper(self, origin_info, origin_visits, origin_branches, origin_releases, origin_branch, root_directory_sha1, directory_entries, visit_id=None, timestamp=None, path=None): dirs = [e for e in directory_entries if e['type'] in ('dir', 'rev')] files = [e for e in directory_entries if e['type'] == 'file'] if not visit_id: visit_id = origin_visits[-1]['visit'] url_args = {'origin_url': origin_info['url']} query_params = {} if timestamp: url_args['timestamp'] = timestamp else: query_params['visit_id'] = visit_id if path: url_args['path'] = path url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('directory.html') self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('directory.html') self.assertContains(resp, '', count=len(dirs)) self.assertContains(resp, '', count=len(files)) if timestamp: url_args['timestamp'] = \ format_utc_iso_date(parse_timestamp(timestamp).isoformat(), '%Y-%m-%dT%H:%M:%S') for d in dirs: if d['type'] == 'rev': dir_url = reverse('browse-revision', url_args={'sha1_git': d['target']}) else: dir_path = d['name'] if path: dir_path = "%s/%s" % (path, d['name']) dir_url_args = dict(url_args) dir_url_args['path'] = dir_path dir_url = reverse('browse-origin-directory', url_args=dir_url_args, query_params=query_params) self.assertContains(resp, dir_url) for f in files: file_path = f['name'] if path: file_path = "%s/%s" % (path, f['name']) file_url_args = dict(url_args) file_url_args['path'] = file_path file_url = reverse('browse-origin-content', url_args=file_url_args, query_params=query_params) self.assertContains(resp, file_url) if 'path' in url_args: del url_args['path'] root_dir_branch_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) nb_bc_paths = 1 if path: nb_bc_paths = len(path.split('/')) + 1 self.assertContains(resp, '
  • ', count=nb_bc_paths) self.assertContains(resp, '%s' % (root_dir_branch_url, root_directory_sha1[:7])) origin_branches_url = \ reverse('browse-origin-branches', url_args=url_args, query_params=query_params) self.assertContains(resp, 'Branches (%s)' % (origin_branches_url, len(origin_branches))) origin_releases_url = \ reverse('browse-origin-releases', url_args=url_args, query_params=query_params) self.assertContains(resp, 'Releases (%s)' % (origin_releases_url, len(origin_releases))) if path: url_args['path'] = path self.assertContains(resp, '
  • ', count=len(origin_branches)) for branch in origin_branches: query_params['branch'] = branch['name'] root_dir_branch_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '' % root_dir_branch_url) self.assertContains(resp, '
  • ', count=len(origin_releases)) query_params['branch'] = None for release in origin_releases: query_params['release'] = release['name'] root_dir_release_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '' % root_dir_release_url) self.assertContains(resp, 'vault-cook-directory') self.assertContains(resp, 'vault-cook-revision') swh_dir_id = get_swh_persistent_id('directory', directory_entries[0]['dir_id']) # noqa swh_dir_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_dir_id}) self.assertContains(resp, swh_dir_id) self.assertContains(resp, swh_dir_id_url) - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.origin.service') def test_origin_root_directory_view(self, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits): mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_utils_service.lookup_directory.return_value = \ stub_origin_root_directory_entries mock_utils_service.lookup_origin.return_value = stub_origin_info mock_utils_service.lookup_snapshot_size.return_value = { 'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1]) } self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, visit_id=stub_visit_id) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, timestamp=stub_visit_unix_ts) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, timestamp=stub_visit_iso_date) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, visit_id=stub_visit_id) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, timestamp=stub_visit_unix_ts) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_root_directory_entries, timestamp=stub_visit_iso_date) - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.utils.snapshot_context.service') def test_origin_sub_directory_view(self, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits): mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_utils_service.lookup_directory.return_value = \ stub_origin_sub_directory_entries mock_origin_service.lookup_directory_with_path.return_value = \ {'target': stub_origin_sub_directory_entries[0]['dir_id'], 'type' : 'dir'} mock_utils_service.lookup_origin.return_value = stub_origin_info mock_utils_service.lookup_snapshot_size.return_value = { 'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1]) } self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, visit_id=stub_visit_id, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, timestamp=stub_visit_unix_ts, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, timestamp=stub_visit_iso_date, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, visit_id=stub_visit_id, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, timestamp=stub_visit_unix_ts, path=stub_origin_sub_directory_path) self.origin_directory_view_helper(stub_origin_info_no_type, stub_origin_visits, stub_origin_snapshot[0], stub_origin_snapshot[1], stub_origin_master_branch, stub_origin_root_directory_sha1, stub_origin_sub_directory_entries, timestamp=stub_visit_iso_date, path=stub_origin_sub_directory_path) @patch('swh.web.browse.views.utils.snapshot_context.request_content') - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.origin.service') @patch('swh.web.browse.views.utils.snapshot_context.service') @patch('swh.web.browse.views.origin.get_origin_info') def test_origin_request_errors(self, mock_get_origin_info, mock_snapshot_service, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits, mock_request_content): mock_get_origin_info.side_effect = \ NotFoundExc('origin not found') url = reverse('browse-origin-visits', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'origin not found', status_code=404) mock_utils_service.lookup_origin.side_effect = None mock_utils_service.lookup_origin.return_value = origin_info_test_data mock_get_origin_visits.return_value = [] url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, "No visit", status_code=404) mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.side_effect = \ NotFoundExc('visit not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}, query_params={'visit_id': len(stub_origin_visits)+1}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found') mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.side_effect = None mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_utils_service.lookup_snapshot_size.return_value = { 'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1]) } mock_utils_service.lookup_directory.side_effect = \ NotFoundExc('Directory not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Directory not found', status_code=404) with patch('swh.web.browse.views.utils.snapshot_context.get_snapshot_context') \ as mock_get_snapshot_context: mock_get_snapshot_context.side_effect = \ NotFoundExc('Snapshot not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Snapshot not found', status_code=404) mock_origin_service.lookup_origin.side_effect = None mock_origin_service.lookup_origin.return_value = origin_info_test_data mock_get_origin_visits.return_value = [] url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'foo'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, "No visit", status_code=404) mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.side_effect = \ NotFoundExc('visit not found') url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'foo'}, query_params={'visit_id': len(stub_origin_visits)+1}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found') mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.side_effect = None mock_get_origin_visit_snapshot.return_value = ([], []) url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'baz'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Origin.*has an empty list of branches') mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_snapshot_service.lookup_directory_with_path.return_value = \ {'target': stub_content_text_data['checksums']['sha1']} mock_request_content.side_effect = \ NotFoundExc('Content not found') url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'baz'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Content not found', status_code=404) - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') def test_origin_empty_snapshot(self, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits): mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = ([], []) mock_utils_service.lookup_snapshot_size.return_value = { 'revision': 0, 'release': 0 } url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertRegex(resp.content.decode('utf-8'), 'snapshot.*is empty') def origin_branches_helper(self, origin_info, origin_snapshot): url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url']} url = reverse('browse-origin-branches', url_args=url_args) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('branches.html') origin_branches = origin_snapshot[0] origin_releases = origin_snapshot[1] origin_branches_url = \ reverse('browse-origin-branches', url_args=url_args) self.assertContains(resp, 'Branches (%s)' % (origin_branches_url, len(origin_branches))) origin_releases_url = \ reverse('browse-origin-releases', url_args=url_args) self.assertContains(resp, 'Releases (%s)' % (origin_releases_url, len(origin_releases))) self.assertContains(resp, '' % escape(browse_branch_url)) browse_revision_url = reverse('browse-revision', url_args={'sha1_git': branch['revision']}, query_params={'origin_type': origin_info['type'], 'origin': origin_info['url']}) self.assertContains(resp, '' % escape(browse_revision_url)) @patch('swh.web.browse.views.utils.snapshot_context.process_snapshot_branches') @patch('swh.web.browse.views.utils.snapshot_context.service') - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.origin.service') def test_origin_branches(self, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits, mock_snp_ctx_service, mock_snp_ctx_process_branches): mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_utils_service.lookup_origin.return_value = stub_origin_info mock_utils_service.lookup_snapshot_size.return_value = \ {'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1])} mock_snp_ctx_service.lookup_snapshot.return_value = \ _to_snapshot_dict(branches=stub_origin_snapshot[0]) mock_snp_ctx_process_branches.return_value = stub_origin_snapshot self.origin_branches_helper(stub_origin_info, stub_origin_snapshot) self.origin_branches_helper(stub_origin_info_no_type, stub_origin_snapshot) def origin_releases_helper(self, origin_info, origin_snapshot): url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url']} url = reverse('browse-origin-releases', url_args=url_args) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('releases.html') origin_branches = origin_snapshot[0] origin_releases = origin_snapshot[1] origin_branches_url = \ reverse('browse-origin-branches', url_args=url_args) self.assertContains(resp, 'Branches (%s)' % (origin_branches_url, len(origin_branches))) origin_releases_url = \ reverse('browse-origin-releases', url_args=url_args) self.assertContains(resp, 'Releases (%s)' % (origin_releases_url, len(origin_releases))) self.assertContains(resp, '' % escape(browse_release_url)) self.assertContains(resp, '' % escape(browse_revision_url)) @patch('swh.web.browse.views.utils.snapshot_context.process_snapshot_branches') @patch('swh.web.browse.views.utils.snapshot_context.service') - @patch('swh.web.browse.utils.get_origin_visits') + @patch('swh.web.common.origin_visits.get_origin_visits') @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.origin.service') def test_origin_releases(self, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits, mock_snp_ctx_service, mock_snp_ctx_process_branches): mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_utils_service.lookup_origin.return_value = stub_origin_info mock_utils_service.lookup_snapshot_size.return_value = \ {'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1])} mock_snp_ctx_service.lookup_snapshot.return_value = \ _to_snapshot_dict(releases=stub_origin_snapshot[1]) mock_snp_ctx_process_branches.return_value = stub_origin_snapshot self.origin_releases_helper(stub_origin_info, stub_origin_snapshot) self.origin_releases_helper(stub_origin_info_no_type, stub_origin_snapshot) diff --git a/swh/web/tests/browse/views/test_release.py b/swh/web/tests/browse/views/test_release.py index 26c4697d..62cbf557 100644 --- a/swh/web/tests/browse/views/test_release.py +++ b/swh/web/tests/browse/views/test_release.py @@ -1,111 +1,110 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from unittest.mock import patch from swh.web.common.exc import NotFoundExc from swh.web.common.utils import ( reverse, format_utc_iso_date, get_swh_persistent_id ) from swh.web.tests.testcase import WebTestCase from .data.release_test_data import ( stub_release ) from .data.origin_test_data import stub_origin_visits class SwhBrowseReleaseTest(WebTestCase): @patch('swh.web.browse.views.release.service') @patch('swh.web.browse.utils.service') - @patch('swh.web.common.utils.service') - def test_release_browse(self, mock_service_common, mock_service_utils, + @patch('swh.web.common.origin_visits.get_origin_visits') + def test_release_browse(self, mock_get_origin_visits, mock_service_utils, mock_service): mock_service.lookup_release.return_value = stub_release url = reverse('browse-release', url_args={'sha1_git': stub_release['id']}) release_id = stub_release['id'] release_name = stub_release['name'] author_id = stub_release['author']['id'] author_name = stub_release['author']['name'] author_url = reverse('browse-person', url_args={'person_id': author_id}) release_date = stub_release['date'] message = stub_release['message'] target_type = stub_release['target_type'] target = stub_release['target'] target_url = reverse('browse-revision', url_args={'sha1_git': target}) message_lines = stub_release['message'].split('\n') resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/release.html') self.assertContains(resp, '%s' % (author_url, author_name)) self.assertContains(resp, format_utc_iso_date(release_date)) self.assertContains(resp, '
    %s
    %s' % (message_lines[0], '\n'.join(message_lines[1:]))) self.assertContains(resp, release_id) self.assertContains(resp, release_name) self.assertContains(resp, target_type) self.assertContains(resp, '%s' % (target_url, target)) swh_rel_id = get_swh_persistent_id('release', release_id) swh_rel_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_rel_id}) self.assertContains(resp, swh_rel_id) self.assertContains(resp, swh_rel_id_url) origin_info = { 'id': 13706355, 'type': 'git', 'url': 'https://github.com/python/cpython' } mock_service_utils.lookup_origin.return_value = origin_info - mock_service_common.lookup_origin_visits.return_value = stub_origin_visits - mock_service_common.MAX_LIMIT = 20 + mock_get_origin_visits.return_value = stub_origin_visits url = reverse('browse-release', url_args={'sha1_git': stub_release['id']}, query_params={'origin': origin_info['url']}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/release.html') self.assertContains(resp, author_url) self.assertContains(resp, author_name) self.assertContains(resp, format_utc_iso_date(release_date)) self.assertContains(resp, '
    %s
    %s' % (message_lines[0], '\n'.join(message_lines[1:]))) self.assertContains(resp, release_id) self.assertContains(resp, release_name) self.assertContains(resp, target_type) target_url = reverse('browse-revision', url_args={'sha1_git': target}, query_params={'origin': origin_info['url']}) self.assertContains(resp, '%s' % (target_url, target)) mock_service.lookup_release.side_effect = \ NotFoundExc('Release not found') url = reverse('browse-release', url_args={'sha1_git': 'ffff'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Release not found', status_code=404) diff --git a/swh/web/tests/browse/views/test_revision.py b/swh/web/tests/browse/views/test_revision.py index b1d0bd6c..69e58991 100644 --- a/swh/web/tests/browse/views/test_revision.py +++ b/swh/web/tests/browse/views/test_revision.py @@ -1,255 +1,255 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from unittest.mock import patch, MagicMock from django.utils.html import escape from swh.web.common.exc import NotFoundExc from swh.web.common.utils import ( reverse, format_utc_iso_date, get_swh_persistent_id, parse_timestamp ) from swh.web.tests.testcase import WebTestCase from .data.revision_test_data import ( revision_id_test, revision_metadata_test, revision_history_log_test ) from .data.origin_test_data import stub_origin_visits, stub_origin_snapshot class SwhBrowseRevisionTest(WebTestCase): @patch('swh.web.browse.utils.get_origin_visit_snapshot') @patch('swh.web.browse.views.revision.service') @patch('swh.web.browse.utils.service') - @patch('swh.web.common.utils.service') - def test_revision_browse(self, mock_service_common, mock_service_utils, + @patch('swh.web.common.origin_visits.get_origin_visits') + def test_revision_browse(self, mock_get_origin_visits, mock_service_utils, mock_service, mock_get_origin_visit_snapshot): mock_service.lookup_revision.return_value = revision_metadata_test url = reverse('browse-revision', url_args={'sha1_git': revision_id_test}) author_id = revision_metadata_test['author']['id'] author_name = revision_metadata_test['author']['name'] committer_id = revision_metadata_test['committer']['id'] committer_name = revision_metadata_test['committer']['name'] dir_id = revision_metadata_test['directory'] author_url = reverse('browse-person', url_args={'person_id': author_id}) committer_url = reverse('browse-person', url_args={'person_id': committer_id}) directory_url = reverse('browse-directory', url_args={'sha1_git': dir_id}) history_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision.html') self.assertContains(resp, '%s' % (author_url, author_name)) self.assertContains(resp, '%s' % (committer_url, committer_name)) self.assertContains(resp, directory_url) self.assertContains(resp, history_url) for parent in revision_metadata_test['parents']: parent_url = reverse('browse-revision', url_args={'sha1_git': parent}) self.assertContains(resp, '%s' % (parent_url, parent)) author_date = revision_metadata_test['date'] committer_date = revision_metadata_test['committer_date'] message_lines = revision_metadata_test['message'].split('\n') self.assertContains(resp, format_utc_iso_date(author_date)) self.assertContains(resp, format_utc_iso_date(committer_date)) self.assertContains(resp, message_lines[0]) self.assertContains(resp, '\n'.join(message_lines[1:])) origin_info = { 'id': '7416001', 'type': 'git', 'url': 'https://github.com/webpack/webpack' } mock_service_utils.lookup_origin.return_value = origin_info - mock_service_common.lookup_origin_visits.return_value = stub_origin_visits + mock_get_origin_visits.return_value = stub_origin_visits mock_get_origin_visit_snapshot.return_value = stub_origin_snapshot mock_service_utils.lookup_snapshot_size.return_value = { 'revision': len(stub_origin_snapshot[0]), 'release': len(stub_origin_snapshot[1]) } - mock_service_common.MAX_LIMIT = 20 origin_directory_url = reverse('browse-origin-directory', url_args={'origin_url': origin_info['url']}, query_params={'revision': revision_id_test}) origin_revision_log_url = reverse('browse-origin-log', url_args={'origin_url': origin_info['url']}, query_params={'revision': revision_id_test}) url = reverse('browse-revision', url_args={'sha1_git': revision_id_test}, query_params={'origin': origin_info['url']}) resp = self.client.get(url) self.assertContains(resp, origin_directory_url) self.assertContains(resp, origin_revision_log_url) for parent in revision_metadata_test['parents']: parent_url = reverse('browse-revision', url_args={'sha1_git': parent}, query_params={'origin': origin_info['url']}) self.assertContains(resp, '%s' % (parent_url, parent)) self.assertContains(resp, 'vault-cook-directory') self.assertContains(resp, 'vault-cook-revision') swh_rev_id = get_swh_persistent_id('revision', revision_id_test) swh_rev_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_rev_id}) self.assertContains(resp, swh_rev_id) self.assertContains(resp, swh_rev_id_url) swh_dir_id = get_swh_persistent_id('directory', dir_id) swh_dir_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_dir_id}) self.assertContains(resp, swh_dir_id) self.assertContains(resp, swh_dir_id_url) @patch('swh.web.browse.views.revision.service') def test_revision_log_browse(self, mock_service): per_page = 10 revision_history_log_test_sorted = \ sorted(revision_history_log_test, key=lambda rev: -parse_timestamp(rev['committer_date']).timestamp()) mock_revs_walker = MagicMock() mock_revs_walker.__iter__.return_value = revision_history_log_test_sorted mock_revs_walker.export_state.return_value = {} mock_service.get_revisions_walker.return_value = mock_revs_walker url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'per_page': per_page}) resp = self.client.get(url) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'offset': per_page, 'per_page': per_page}) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, 'Newer') self.assertContains(resp, 'Older' % escape(next_page_url)) for log in revision_history_log_test_sorted[:per_page]: author_url = reverse('browse-person', url_args={'person_id': log['author']['id']}) revision_url = reverse('browse-revision', url_args={'sha1_git': log['id']}) self.assertContains(resp, log['id'][:7]) self.assertContains(resp, log['author']['name']) self.assertContains(resp, format_utc_iso_date(log['date'])) self.assertContains(resp, escape(log['message'])) self.assertContains(resp, format_utc_iso_date(log['committer_date'])) self.assertContains(resp, revision_url) resp = self.client.get(next_page_url) prev_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'per_page': per_page}) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'offset': 2 * per_page, 'per_page': per_page}) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, 'Newer' % escape(prev_page_url)) self.assertContains(resp, 'Older' % escape(next_page_url)) resp = self.client.get(next_page_url) prev_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'offset': per_page, 'per_page': per_page}) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}, query_params={'offset': 3 * per_page, 'per_page': per_page}) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, 'Newer' % escape(prev_page_url)) self.assertContains(resp, 'Older' % escape(next_page_url)) @patch('swh.web.browse.utils.service') @patch('swh.web.browse.views.revision.service') def test_revision_request_errors(self, mock_service, mock_utils_service): mock_service.lookup_revision.side_effect = \ NotFoundExc('Revision not found') url = reverse('browse-revision', url_args={'sha1_git': revision_id_test}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Revision not found', status_code=404) mock_service.get_revisions_walker.side_effect = \ NotFoundExc('Revision not found') url = reverse('browse-revision-log', url_args={'sha1_git': revision_id_test}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Revision not found', status_code=404) url = reverse('browse-revision', url_args={'sha1_git': revision_id_test}, query_params={'origin_type': 'git', 'origin': 'https://github.com/foo/bar'}) mock_service.lookup_revision.side_effect = None mock_utils_service.lookup_origin.side_effect = \ NotFoundExc('Origin not found') resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') - self.assertContains(resp, 'Origin not found', status_code=404) + self.assertContains(resp, 'the origin mentioned in your request appears broken', + status_code=404) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py new file mode 100644 index 00000000..0194dfba --- /dev/null +++ b/swh/web/tests/common/test_origin_visits.py @@ -0,0 +1,115 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from unittest.mock import patch + +from swh.web.common.exc import NotFoundExc +from swh.web.common.origin_visits import ( + get_origin_visits, get_origin_visit +) +from swh.web.tests.testcase import WebTestCase + + +class OriginVisitsTestCase(WebTestCase): + @patch('swh.web.common.service') + def test_get_origin_visits(self, mock_service): + mock_service.MAX_LIMIT = 2 + + def _lookup_origin_visits(*args, **kwargs): + if kwargs['last_visit'] is None: + return [{'visit': 1, + 'date': '2017-05-06T00:59:10+00:00', + 'metadata': {}}, + {'visit': 2, + 'date': '2017-08-06T00:59:10+00:00', + 'metadata': {}} + ] + else: + return [{'visit': 3, + 'date': '2017-09-06T00:59:10+00:00', + 'metadata': {}} + ] + + mock_service.lookup_origin_visits.side_effect = _lookup_origin_visits + + origin_info = { + 'id': 1, + 'type': 'git', + 'url': 'https://github.com/foo/bar', + } + + origin_visits = get_origin_visits(origin_info) + + self.assertEqual(len(origin_visits), 3) + + @patch('swh.web.common.origin_visits.get_origin_visits') + def test_get_origin_visit(self, mock_origin_visits): + origin_info = { + 'id': 2, + 'type': 'git', + 'url': 'https://github.com/foo/bar', + } + visits = \ + [{'status': 'full', + 'date': '2015-07-09T21:09:24+00:00', + 'visit': 1, + 'origin': origin_info['id']}, + {'status': 'full', + 'date': '2016-02-23T18:05:23.312045+00:00', + 'visit': 2, + 'origin': origin_info['id']}, + {'status': 'full', + 'date': '2016-03-28T01:35:06.554111+00:00', + 'visit': 3, + 'origin': origin_info['id']}, + {'status': 'full', + 'date': '2016-06-18T01:22:24.808485+00:00', + 'visit': 4, + 'origin': origin_info['id']}, + {'status': 'full', + 'date': '2016-08-14T12:10:00.536702+00:00', + 'visit': 5, + 'origin': origin_info['id']}] + mock_origin_visits.return_value = visits + + visit_id = 12 + with self.assertRaises(NotFoundExc) as cm: + visit = get_origin_visit(origin_info, + visit_id=visit_id) + exception_text = cm.exception.args[0] + self.assertIn('Visit with id %s' % visit_id, exception_text) + self.assertIn('type %s' % origin_info['type'], exception_text) + self.assertIn('url %s' % origin_info['url'], exception_text) + + visit = get_origin_visit(origin_info, visit_id=2) + self.assertEqual(visit, visits[1]) + + visit = get_origin_visit( + origin_info, visit_ts='2016-02-23T18:05:23.312045+00:00') + self.assertEqual(visit, visits[1]) + + visit = get_origin_visit( + origin_info, visit_ts='2016-02-20') + self.assertEqual(visit, visits[1]) + + visit = get_origin_visit( + origin_info, visit_ts='2016-06-18T01:22') + self.assertEqual(visit, visits[3]) + + visit = get_origin_visit( + origin_info, visit_ts='2016-06-18 01:22') + self.assertEqual(visit, visits[3]) + + visit = get_origin_visit( + origin_info, visit_ts=1466208000) + self.assertEqual(visit, visits[3]) + + visit = get_origin_visit( + origin_info, visit_ts='2014-01-01') + self.assertEqual(visit, visits[0]) + + visit = get_origin_visit( + origin_info, visit_ts='2018-01-01') + self.assertEqual(visit, visits[-1]) diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py index 0f92e628..2d79ed03 100644 --- a/swh/web/tests/common/test_service.py +++ b/swh/web/tests/common/test_service.py @@ -1,1948 +1,1915 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from unittest.mock import MagicMock, patch, call from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.web.common import service from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.tests.testcase import WebTestCase class ServiceTestCase(WebTestCase): def setUp(self): self.BLAKE2S256_SAMPLE = ('685395c5dc57cada459364f0946d3dd45b' 'ad5fcbabc1048edb44380f1d31d0aa') self.BLAKE2S256_SAMPLE_BIN = hash_to_bytes(self.BLAKE2S256_SAMPLE) self.SHA1_SAMPLE = '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03' self.SHA1_SAMPLE_BIN = hash_to_bytes(self.SHA1_SAMPLE) self.SHA256_SAMPLE = ('8abb0aa566452620ecce816eecdef4792d77a' '293ad8ea82a4d5ecb4d36f7e560') self.SHA256_SAMPLE_BIN = hash_to_bytes(self.SHA256_SAMPLE) self.SHA1GIT_SAMPLE = '25d1a2e8f32937b0f498a5ca87f823d8df013c01' self.SHA1GIT_SAMPLE_BIN = hash_to_bytes(self.SHA1GIT_SAMPLE) self.DIRECTORY_ID = '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6' self.DIRECTORY_ID_BIN = hash_to_bytes(self.DIRECTORY_ID) self.AUTHOR_ID_BIN = { 'name': b'author', 'email': b'author@company.org', } self.AUTHOR_ID = { 'name': 'author', 'email': 'author@company.org', } self.COMMITTER_ID_BIN = { 'name': b'committer', 'email': b'committer@corp.org', } self.COMMITTER_ID = { 'name': 'committer', 'email': 'committer@corp.org', } self.SAMPLE_DATE_RAW = { 'timestamp': datetime.datetime( 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc, ).timestamp(), 'offset': 0, 'negative_utc': False, } self.SAMPLE_DATE = '2000-01-17T11:23:54+00:00' self.SAMPLE_MESSAGE_BIN = b'elegant fix for bug 31415957' self.SAMPLE_MESSAGE = 'elegant fix for bug 31415957' self.SAMPLE_REVISION = { 'id': self.SHA1_SAMPLE, 'directory': self.DIRECTORY_ID, 'author': self.AUTHOR_ID, 'committer': self.COMMITTER_ID, 'message': self.SAMPLE_MESSAGE, 'date': self.SAMPLE_DATE, 'committer_date': self.SAMPLE_DATE, 'synthetic': False, 'type': 'git', 'parents': [], 'metadata': {}, 'merge': False } self.SAMPLE_REVISION_RAW = { 'id': self.SHA1_SAMPLE_BIN, 'directory': self.DIRECTORY_ID_BIN, 'author': self.AUTHOR_ID_BIN, 'committer': self.COMMITTER_ID_BIN, 'message': self.SAMPLE_MESSAGE_BIN, 'date': self.SAMPLE_DATE_RAW, 'committer_date': self.SAMPLE_DATE_RAW, 'synthetic': False, 'type': 'git', 'parents': [], 'metadata': [], } self.SAMPLE_CONTENT = { 'checksums': { 'blake2s256': self.BLAKE2S256_SAMPLE, 'sha1': self.SHA1_SAMPLE, 'sha256': self.SHA256_SAMPLE, 'sha1_git': self.SHA1GIT_SAMPLE, }, 'length': 190, 'status': 'absent' } self.SAMPLE_CONTENT_RAW = { 'blake2s256': self.BLAKE2S256_SAMPLE_BIN, 'sha1': self.SHA1_SAMPLE_BIN, 'sha256': self.SHA256_SAMPLE_BIN, 'sha1_git': self.SHA1GIT_SAMPLE_BIN, 'length': 190, 'status': 'hidden' } self.date_origin_visit1 = datetime.datetime( 2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc) self.origin_visit1 = { 'date': self.date_origin_visit1, 'origin': 1, 'visit': 1 } @patch('swh.web.common.service.storage') def test_lookup_multiple_hashes_ball_missing(self, mock_storage): # given mock_storage.content_missing_per_sha1 = MagicMock(return_value=[]) # when actual_lookup = service.lookup_multiple_hashes( [{'filename': 'a', 'sha1': '456caf10e9535160d90e874b45aa426de762f19f'}, {'filename': 'b', 'sha1': '745bab676c8f3cec8016e0c39ea61cf57e518865'}]) # then self.assertEqual(actual_lookup, [ {'filename': 'a', 'sha1': '456caf10e9535160d90e874b45aa426de762f19f', 'found': True}, {'filename': 'b', 'sha1': '745bab676c8f3cec8016e0c39ea61cf57e518865', 'found': True} ]) @patch('swh.web.common.service.storage') def test_lookup_multiple_hashes_some_missing(self, mock_storage): # given mock_storage.content_missing_per_sha1 = MagicMock(return_value=[ hash_to_bytes('456caf10e9535160d90e874b45aa426de762f19f') ]) # when actual_lookup = service.lookup_multiple_hashes( [{'filename': 'a', 'sha1': '456caf10e9535160d90e874b45aa426de762f19f'}, {'filename': 'b', 'sha1': '745bab676c8f3cec8016e0c39ea61cf57e518865'}]) # then self.assertEqual(actual_lookup, [ {'filename': 'a', 'sha1': '456caf10e9535160d90e874b45aa426de762f19f', 'found': False}, {'filename': 'b', 'sha1': '745bab676c8f3cec8016e0c39ea61cf57e518865', 'found': True} ]) @patch('swh.web.common.service.storage') def test_lookup_hash_does_not_exist(self, mock_storage): # given mock_storage.content_find = MagicMock(return_value=None) # when actual_lookup = service.lookup_hash( 'sha1_git:123caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual({'found': None, 'algo': 'sha1_git'}, actual_lookup) # check the function has been called with parameters mock_storage.content_find.assert_called_with( {'sha1_git': hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')}) @patch('swh.web.common.service.storage') def test_lookup_hash_exist(self, mock_storage): # given stub_content = { 'sha1': hash_to_bytes( '456caf10e9535160d90e874b45aa426de762f19f') } mock_storage.content_find = MagicMock(return_value=stub_content) # when actual_lookup = service.lookup_hash( 'sha1:456caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual({ 'found': { 'checksums': { 'sha1': '456caf10e9535160d90e874b45aa426de762f19f' } }, 'algo': 'sha1' }, actual_lookup) mock_storage.content_find.assert_called_with( {'sha1': hash_to_bytes('456caf10e9535160d90e874b45aa426de762f19f')} ) @patch('swh.web.common.service.storage') def test_search_hash_does_not_exist(self, mock_storage): # given mock_storage.content_find = MagicMock(return_value=None) # when actual_lookup = service.search_hash( 'sha1_git:123caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual({'found': False}, actual_lookup) # check the function has been called with parameters mock_storage.content_find.assert_called_with( {'sha1_git': hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')}) @patch('swh.web.common.service.storage') def test_search_hash_exist(self, mock_storage): # given stub_content = { 'sha1': hash_to_bytes( '456caf10e9535160d90e874b45aa426de762f19f') } mock_storage.content_find = MagicMock(return_value=stub_content) # when actual_lookup = service.search_hash( 'sha1:456caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual({'found': True}, actual_lookup) mock_storage.content_find.assert_called_with( {'sha1': hash_to_bytes('456caf10e9535160d90e874b45aa426de762f19f')}, ) @patch('swh.web.common.service.idx_storage') def test_lookup_content_ctags(self, mock_idx_storage): # given mock_idx_storage.content_ctags_get = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'line': 100, 'name': 'hello', 'kind': 'function', 'tool_name': 'ctags', 'tool_version': 'some-version', }]) expected_ctags = [{ 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'line': 100, 'name': 'hello', 'kind': 'function', 'tool_name': 'ctags', 'tool_version': 'some-version', }] # when actual_ctags = list(service.lookup_content_ctags( 'sha1:123caf10e9535160d90e874b45aa426de762f19f')) # then self.assertEqual(actual_ctags, expected_ctags) mock_idx_storage.content_ctags_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') def test_lookup_content_ctags_no_hash(self, mock_idx_storage): # given mock_idx_storage.content_ctags_get = MagicMock(return_value=[]) # when actual_ctags = list(service.lookup_content_ctags( 'sha1:123caf10e9535160d90e874b45aa426de762f19f')) # then self.assertEqual(actual_ctags, []) @patch('swh.web.common.service.idx_storage') def test_lookup_content_filetype(self, mock_idx_storage): # given mock_idx_storage.content_mimetype_get = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'mimetype': 'text/x-c++', 'encoding': 'us-ascii', }]) expected_filetype = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'mimetype': 'text/x-c++', 'encoding': 'us-ascii', } # when actual_filetype = service.lookup_content_filetype( 'sha1:123caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_filetype, expected_filetype) mock_idx_storage.content_mimetype_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') @patch('swh.web.common.service.storage') def test_lookup_content_filetype_2(self, mock_storage, mock_idx_storage): # given mock_storage.content_find = MagicMock( return_value={ 'sha1': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f') } ) mock_idx_storage.content_mimetype_get = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'mimetype': 'text/x-python', 'encoding': 'us-ascii', }] ) expected_filetype = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'mimetype': 'text/x-python', 'encoding': 'us-ascii', } # when actual_filetype = service.lookup_content_filetype( 'sha1_git:456caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_filetype, expected_filetype) mock_storage.content_find( 'sha1_git', hash_to_bytes( '456caf10e9535160d90e874b45aa426de762f19f') ) mock_idx_storage.content_mimetype_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') def test_lookup_content_language(self, mock_idx_storage): # given mock_idx_storage.content_language_get = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'lang': 'python', }]) expected_language = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'lang': 'python', } # when actual_language = service.lookup_content_language( 'sha1:123caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_language, expected_language) mock_idx_storage.content_language_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') @patch('swh.web.common.service.storage') def test_lookup_content_language_2(self, mock_storage, mock_idx_storage): # given mock_storage.content_find = MagicMock( return_value={ 'sha1': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f') } ) mock_idx_storage.content_language_get = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'lang': 'haskell', }] ) expected_language = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'lang': 'haskell', } # when actual_language = service.lookup_content_language( 'sha1_git:456caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_language, expected_language) mock_storage.content_find( 'sha1_git', hash_to_bytes( '456caf10e9535160d90e874b45aa426de762f19f') ) mock_idx_storage.content_language_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') def test_lookup_expression(self, mock_idx_storage): # given mock_idx_storage.content_ctags_search = MagicMock( return_value=[{ 'id': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f'), 'name': 'foobar', 'kind': 'variable', 'lang': 'C', 'line': 10 }]) expected_ctags = [{ 'sha1': '123caf10e9535160d90e874b45aa426de762f19f', 'name': 'foobar', 'kind': 'variable', 'lang': 'C', 'line': 10 }] # when actual_ctags = list(service.lookup_expression( 'foobar', last_sha1='hash', per_page=10)) # then self.assertEqual(actual_ctags, expected_ctags) mock_idx_storage.content_ctags_search.assert_called_with( 'foobar', last_sha1='hash', limit=10) @patch('swh.web.common.service.idx_storage') def test_lookup_expression_no_result(self, mock_idx_storage): # given mock_idx_storage.content_ctags_search = MagicMock( return_value=[]) expected_ctags = [] # when actual_ctags = list(service.lookup_expression( 'barfoo', last_sha1='hash', per_page=10)) # then self.assertEqual(actual_ctags, expected_ctags) mock_idx_storage.content_ctags_search.assert_called_with( 'barfoo', last_sha1='hash', limit=10) @patch('swh.web.common.service.idx_storage') def test_lookup_content_license(self, mock_idx_storage): # given mock_idx_storage.content_fossology_license_get = MagicMock( return_value=[{ hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f'): [{ 'licenses': ['GPL-3.0+'], 'tool': {} }] }]) expected_license = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'facts': [{ 'licenses': ['GPL-3.0+'], 'tool': {} }] } # when actual_license = service.lookup_content_license( 'sha1:123caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_license, expected_license) mock_idx_storage.content_fossology_license_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.idx_storage') @patch('swh.web.common.service.storage') def test_lookup_content_license_2(self, mock_storage, mock_idx_storage): # given mock_storage.content_find = MagicMock( return_value={ 'sha1': hash_to_bytes( '123caf10e9535160d90e874b45aa426de762f19f') } ) mock_idx_storage.content_fossology_license_get = MagicMock( return_value=[{ hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f'): [{ 'licenses': ['BSD-2-Clause'], 'tool': {} }] }] ) expected_license = { 'id': '123caf10e9535160d90e874b45aa426de762f19f', 'facts': [{ 'licenses': ['BSD-2-Clause'], 'tool': {} }] } # when actual_license = service.lookup_content_license( 'sha1_git:456caf10e9535160d90e874b45aa426de762f19f') # then self.assertEqual(actual_license, expected_license) mock_storage.content_find( 'sha1_git', hash_to_bytes( '456caf10e9535160d90e874b45aa426de762f19f') ) mock_idx_storage.content_fossology_license_get.assert_called_with( [hash_to_bytes('123caf10e9535160d90e874b45aa426de762f19f')]) @patch('swh.web.common.service.storage') def test_stat_counters(self, mock_storage): # given input_stats = { "content": 1770830, "directory": 211683, "directory_entry_dir": 209167, "directory_entry_file": 1807094, "directory_entry_rev": 0, "origin": 1096, "person": 0, "release": 8584, "revision": 7792, "revision_history": 0, "skipped_content": 0 } mock_storage.stat_counters = MagicMock(return_value=input_stats) # when actual_stats = service.stat_counters() # then expected_stats = input_stats self.assertEqual(actual_stats, expected_stats) mock_storage.stat_counters.assert_called_with() @patch('swh.web.common.service._lookup_origin_visits') def test_lookup_origin_visits(self, mock_lookup_visits): # given date_origin_visit2 = datetime.datetime( 2013, 7, 1, 20, 0, 0, tzinfo=datetime.timezone.utc) date_origin_visit3 = datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc) stub_result = [self.origin_visit1, { 'date': date_origin_visit2, 'origin': 1, 'visit': 2, 'target': hash_to_bytes( '65a55bbdf3629f916219feb3dcc7393ded1bc8db'), 'branch': b'master', 'target_type': 'release', 'metadata': None, }, { 'date': date_origin_visit3, 'origin': 1, 'visit': 3 }] mock_lookup_visits.return_value = stub_result # when expected_origin_visits = [{ 'date': self.origin_visit1['date'].isoformat(), 'origin': self.origin_visit1['origin'], 'visit': self.origin_visit1['visit'] }, { 'date': date_origin_visit2.isoformat(), 'origin': 1, 'visit': 2, 'target': '65a55bbdf3629f916219feb3dcc7393ded1bc8db', 'branch': 'master', 'target_type': 'release', 'metadata': {}, }, { 'date': date_origin_visit3.isoformat(), 'origin': 1, 'visit': 3 }] actual_origin_visits = service.lookup_origin_visits(6) # then self.assertEqual(list(actual_origin_visits), expected_origin_visits) mock_lookup_visits.assert_called_once_with( 6, last_visit=None, limit=10) @patch('swh.web.common.service.storage') def test_lookup_origin_visit(self, mock_storage): # given stub_result = self.origin_visit1 mock_storage.origin_visit_get_by.return_value = stub_result expected_origin_visit = { 'date': self.origin_visit1['date'].isoformat(), 'origin': self.origin_visit1['origin'], 'visit': self.origin_visit1['visit'] } # when actual_origin_visit = service.lookup_origin_visit(1, 1) # then self.assertEqual(actual_origin_visit, expected_origin_visit) mock_storage.origin_visit_get_by.assert_called_once_with(1, 1) @patch('swh.web.common.service.storage') def test_lookup_origin(self, mock_storage): # given mock_storage.origin_get = MagicMock(return_value={ 'id': 'origin-id', 'url': 'ftp://some/url/to/origin', 'type': 'ftp'}) # when actual_origin = service.lookup_origin({'id': 'origin-id'}) # then self.assertEqual(actual_origin, {'id': 'origin-id', 'url': 'ftp://some/url/to/origin', 'type': 'ftp'}) mock_storage.origin_get.assert_called_with({'id': 'origin-id'}) @patch('swh.web.common.service.storage') def test_lookup_release_ko_id_checksum_not_a_sha1(self, mock_storage): # given mock_storage.release_get = MagicMock() with self.assertRaises(BadInputExc) as cm: # when service.lookup_release('not-a-sha1') self.assertIn('invalid checksum', cm.exception.args[0].lower()) mock_storage.release_get.called = False @patch('swh.web.common.service.storage') def test_lookup_release_ko_id_checksum_too_long(self, mock_storage): # given mock_storage.release_get = MagicMock() # when with self.assertRaises(BadInputExc) as cm: service.lookup_release( '13c1d34d138ec13b5ebad226dc2528dc7506c956e4646f62d4daf5' '1aea892abe') self.assertEqual('Only sha1_git is supported.', cm.exception.args[0]) mock_storage.release_get.called = False @patch('swh.web.common.service.storage') def test_lookup_directory_with_path_not_found(self, mock_storage): # given mock_storage.lookup_directory_with_path = MagicMock(return_value=None) sha1_git = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' # when actual_directory = mock_storage.lookup_directory_with_path( sha1_git, 'some/path/here') self.assertIsNone(actual_directory) @patch('swh.web.common.service.storage') def test_lookup_directory_with_path_found(self, mock_storage): # given sha1_git = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' entry = {'id': 'dir-id', 'type': 'dir', 'name': 'some/path/foo'} mock_storage.lookup_directory_with_path = MagicMock(return_value=entry) # when actual_directory = mock_storage.lookup_directory_with_path( sha1_git, 'some/path/here') self.assertEqual(entry, actual_directory) @patch('swh.web.common.service.storage') def test_lookup_release(self, mock_storage): # given mock_storage.release_get = MagicMock(return_value=[{ 'id': hash_to_bytes('65a55bbdf3629f916219feb3dcc7393ded1bc8db'), 'target': None, 'date': { 'timestamp': datetime.datetime( 2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc).timestamp(), 'offset': 0, 'negative_utc': True, }, 'name': b'v0.0.1', 'message': b'synthetic release', 'synthetic': True, }]) # when actual_release = service.lookup_release( '65a55bbdf3629f916219feb3dcc7393ded1bc8db') # then self.assertEqual(actual_release, { 'id': '65a55bbdf3629f916219feb3dcc7393ded1bc8db', 'target': None, 'date': '2015-01-01T22:00:00-00:00', 'name': 'v0.0.1', 'message': 'synthetic release', 'synthetic': True, }) mock_storage.release_get.assert_called_with( [hash_to_bytes('65a55bbdf3629f916219feb3dcc7393ded1bc8db')]) def test_lookup_revision_with_context_ko_not_a_sha1_1(self): # given sha1_git = '13c1d34d138ec13b5ebad226dc2528dc7506c956e4646f62d4' \ 'daf51aea892abe' sha1_git_root = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' # when with self.assertRaises(BadInputExc) as cm: service.lookup_revision_with_context(sha1_git_root, sha1_git) self.assertIn('Only sha1_git is supported', cm.exception.args[0]) def test_lookup_revision_with_context_ko_not_a_sha1_2(self): # given sha1_git_root = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' sha1_git = '13c1d34d138ec13b5ebad226dc2528dc7506c956e4646f6' \ '2d4daf51aea892abe' # when with self.assertRaises(BadInputExc) as cm: service.lookup_revision_with_context(sha1_git_root, sha1_git) self.assertIn('Only sha1_git is supported', cm.exception.args[0]) @patch('swh.web.common.service.storage') def test_lookup_revision_with_context_ko_sha1_git_does_not_exist( self, mock_storage): # given sha1_git_root = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' sha1_git = '777777bdf3629f916219feb3dcc7393ded1bc8db' sha1_git_bin = hash_to_bytes(sha1_git) mock_storage.revision_get.return_value = None # when with self.assertRaises(NotFoundExc) as cm: service.lookup_revision_with_context(sha1_git_root, sha1_git) self.assertIn('Revision 777777bdf3629f916219feb3dcc7393ded1bc8db' ' not found', cm.exception.args[0]) mock_storage.revision_get.assert_called_once_with( [sha1_git_bin]) @patch('swh.web.common.service.storage') def test_lookup_revision_with_context_ko_root_sha1_git_does_not_exist( self, mock_storage): # given sha1_git_root = '65a55bbdf3629f916219feb3dcc7393ded1bc8db' sha1_git = '777777bdf3629f916219feb3dcc7393ded1bc8db' sha1_git_root_bin = hash_to_bytes(sha1_git_root) sha1_git_bin = hash_to_bytes(sha1_git) mock_storage.revision_get.side_effect = ['foo', None] # when with self.assertRaises(NotFoundExc) as cm: service.lookup_revision_with_context(sha1_git_root, sha1_git) self.assertIn('Revision root 65a55bbdf3629f916219feb3dcc7393ded1bc8db' ' not found', cm.exception.args[0]) mock_storage.revision_get.assert_has_calls([call([sha1_git_bin]), call([sha1_git_root_bin])]) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_revision_with_context(self, mock_query, mock_storage): # given sha1_git_root = '666' sha1_git = '883' sha1_git_root_bin = b'666' sha1_git_bin = b'883' sha1_git_root_dict = { 'id': sha1_git_root_bin, 'parents': [b'999'], } sha1_git_dict = { 'id': sha1_git_bin, 'parents': [], 'directory': b'278', } stub_revisions = [ sha1_git_root_dict, { 'id': b'999', 'parents': [b'777', b'883', b'888'], }, { 'id': b'777', 'parents': [b'883'], }, sha1_git_dict, { 'id': b'888', 'parents': [b'889'], }, { 'id': b'889', 'parents': [], }, ] # inputs ok mock_query.parse_hash_with_algorithms_or_throws.side_effect = [ ('sha1', sha1_git_bin), ('sha1', sha1_git_root_bin) ] # lookup revision first 883, then 666 (both exists) mock_storage.revision_get.return_value = [ sha1_git_dict, sha1_git_root_dict ] mock_storage.revision_log = MagicMock( return_value=stub_revisions) # when actual_revision = service.lookup_revision_with_context( sha1_git_root, sha1_git) # then self.assertEqual(actual_revision, { 'id': hash_to_hex(sha1_git_bin), 'parents': [], 'children': [hash_to_hex(b'999'), hash_to_hex(b'777')], 'directory': hash_to_hex(b'278'), 'merge': False }) mock_query.parse_hash_with_algorithms_or_throws.assert_has_calls( [call(sha1_git, ['sha1'], 'Only sha1_git is supported.'), call(sha1_git_root, ['sha1'], 'Only sha1_git is supported.')]) mock_storage.revision_log.assert_called_with( [sha1_git_root_bin], 100) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_revision_with_context_retrieved_as_dict( self, mock_query, mock_storage): # given sha1_git = '883' sha1_git_root_bin = b'666' sha1_git_bin = b'883' sha1_git_root_dict = { 'id': sha1_git_root_bin, 'parents': [b'999'], } sha1_git_dict = { 'id': sha1_git_bin, 'parents': [], 'directory': b'278', } stub_revisions = [ sha1_git_root_dict, { 'id': b'999', 'parents': [b'777', b'883', b'888'], }, { 'id': b'777', 'parents': [b'883'], }, sha1_git_dict, { 'id': b'888', 'parents': [b'889'], }, { 'id': b'889', 'parents': [], }, ] # inputs ok mock_query.parse_hash_with_algorithms_or_throws.return_value = ( 'sha1', sha1_git_bin) # lookup only on sha1 mock_storage.revision_get.return_value = [sha1_git_dict] mock_storage.revision_log.return_value = stub_revisions # when actual_revision = service.lookup_revision_with_context( {'id': sha1_git_root_bin}, sha1_git) # then self.assertEqual(actual_revision, { 'id': hash_to_hex(sha1_git_bin), 'parents': [], 'children': [hash_to_hex(b'999'), hash_to_hex(b'777')], 'directory': hash_to_hex(b'278'), 'merge': False }) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with( # noqa sha1_git, ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([sha1_git_bin]) mock_storage.revision_log.assert_called_with( [sha1_git_root_bin], 100) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_not_found(self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') mock_storage.revision_get.return_value = None # when with self.assertRaises(NotFoundExc) as cm: service.lookup_directory_with_revision('123') self.assertIn('Revision 123 not found', cm.exception.args[0]) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_ko_revision_with_path_to_nowhere( self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] mock_storage.directory_entry_get_by_path.return_value = None # when with self.assertRaises(NotFoundExc) as cm: service.lookup_directory_with_revision( '123', 'path/to/something/unknown') exception_text = cm.exception.args[0].lower() self.assertIn('directory or file', exception_text) self.assertIn('path/to/something/unknown', exception_text) self.assertIn('revision 123', exception_text) self.assertIn('not found', exception_text) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_entry_get_by_path.assert_called_once_with( b'dir-id-as-sha1', [b'path', b'to', b'something', b'unknown']) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_ko_type_not_implemented( self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] mock_storage.directory_entry_get_by_path.return_value = { 'type': 'rev', 'name': b'some/path/to/rev', 'target': b'456' } stub_content = { 'id': b'12', 'type': 'file' } mock_storage.content_get.return_value = stub_content # when with self.assertRaises(NotImplementedError) as cm: service.lookup_directory_with_revision( '123', 'some/path/to/rev') self.assertIn("Entity of type rev not implemented.", cm.exception.args[0]) # then mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_entry_get_by_path.assert_called_once_with( b'dir-id-as-sha1', [b'some', b'path', b'to', b'rev']) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_revision_without_path( self, mock_query, mock_storage, ): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] stub_dir_entries = [{ 'id': b'123', 'type': 'dir' }, { 'id': b'456', 'type': 'file' }] mock_storage.directory_ls.return_value = stub_dir_entries # when actual_directory_entries = service.lookup_directory_with_revision( '123') self.assertEqual(actual_directory_entries['type'], 'dir') self.assertEqual(list(actual_directory_entries['content']), stub_dir_entries) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_ls.assert_called_once_with(dir_id) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_with_path_to_dir(self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] stub_dir_entries = [{ 'id': b'12', 'type': 'dir' }, { 'id': b'34', 'type': 'file' }] mock_storage.directory_entry_get_by_path.return_value = { 'type': 'dir', 'name': b'some/path', 'target': b'456' } mock_storage.directory_ls.return_value = stub_dir_entries # when actual_directory_entries = service.lookup_directory_with_revision( '123', 'some/path') self.assertEqual(actual_directory_entries['type'], 'dir') self.assertEqual(actual_directory_entries['revision'], '123') self.assertEqual(actual_directory_entries['path'], 'some/path') self.assertEqual(list(actual_directory_entries['content']), stub_dir_entries) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_entry_get_by_path.assert_called_once_with( dir_id, [b'some', b'path']) mock_storage.directory_ls.assert_called_once_with(b'456') @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_with_path_to_file_wo_data( self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] mock_storage.directory_entry_get_by_path.return_value = { 'type': 'file', 'name': b'some/path/to/file', 'target': b'789' } stub_content = { 'status': 'visible', } mock_storage.content_find.return_value = stub_content # when actual_content = service.lookup_directory_with_revision( '123', 'some/path/to/file') # then self.assertEqual(actual_content, {'type': 'file', 'revision': '123', 'path': 'some/path/to/file', 'content': stub_content}) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_entry_get_by_path.assert_called_once_with( b'dir-id-as-sha1', [b'some', b'path', b'to', b'file']) mock_storage.content_find.assert_called_once_with({'sha1_git': b'789'}) @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_with_revision_with_path_to_file_w_data( self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ('sha1', b'123') dir_id = b'dir-id-as-sha1' mock_storage.revision_get.return_value = [{ 'directory': dir_id, }] mock_storage.directory_entry_get_by_path.return_value = { 'type': 'file', 'name': b'some/path/to/file', 'target': b'789' } stub_content = { 'status': 'visible', 'sha1': b'content-sha1' } mock_storage.content_find.return_value = stub_content mock_storage.content_get.return_value = [{ 'sha1': b'content-sha1', 'data': b'some raw data' }] expected_content = { 'status': 'visible', 'checksums': { 'sha1': hash_to_hex(b'content-sha1'), }, 'data': b'some raw data' } # when actual_content = service.lookup_directory_with_revision( '123', 'some/path/to/file', with_data=True) # then self.assertEqual(actual_content, {'type': 'file', 'revision': '123', 'path': 'some/path/to/file', 'content': expected_content}) mock_query.parse_hash_with_algorithms_or_throws.assert_called_once_with ('123', ['sha1'], 'Only sha1_git is supported.') mock_storage.revision_get.assert_called_once_with([b'123']) mock_storage.directory_entry_get_by_path.assert_called_once_with( b'dir-id-as-sha1', [b'some', b'path', b'to', b'file']) mock_storage.content_find.assert_called_once_with({'sha1_git': b'789'}) mock_storage.content_get.assert_called_once_with([b'content-sha1']) @patch('swh.web.common.service.storage') def test_lookup_revision(self, mock_storage): # given mock_storage.revision_get = MagicMock( return_value=[self.SAMPLE_REVISION_RAW]) # when actual_revision = service.lookup_revision( self.SHA1_SAMPLE) # then self.assertEqual(actual_revision, self.SAMPLE_REVISION) mock_storage.revision_get.assert_called_with( [self.SHA1_SAMPLE_BIN]) @patch('swh.web.common.service.storage') def test_lookup_revision_invalid_msg(self, mock_storage): # given stub_rev = self.SAMPLE_REVISION_RAW stub_rev['message'] = b'elegant fix for bug \xff' expected_revision = self.SAMPLE_REVISION expected_revision['message'] = None expected_revision['message_decoding_failed'] = True mock_storage.revision_get = MagicMock(return_value=[stub_rev]) # when actual_revision = service.lookup_revision( self.SHA1_SAMPLE) # then self.assertEqual(actual_revision, expected_revision) mock_storage.revision_get.assert_called_with( [self.SHA1_SAMPLE_BIN]) @patch('swh.web.common.service.storage') def test_lookup_revision_msg_ok(self, mock_storage): # given mock_storage.revision_get.return_value = [self.SAMPLE_REVISION_RAW] # when rv = service.lookup_revision_message( self.SHA1_SAMPLE) # then self.assertEqual(rv, {'message': self.SAMPLE_MESSAGE_BIN}) mock_storage.revision_get.assert_called_with( [self.SHA1_SAMPLE_BIN]) @patch('swh.web.common.service.storage') def test_lookup_revision_msg_absent(self, mock_storage): # given stub_revision = self.SAMPLE_REVISION_RAW del stub_revision['message'] mock_storage.revision_get.return_value = stub_revision # when with self.assertRaises(NotFoundExc) as cm: service.lookup_revision_message( self.SHA1_SAMPLE) # then mock_storage.revision_get.assert_called_with( [self.SHA1_SAMPLE_BIN]) self.assertEqual( cm.exception.args[0], 'No message for revision with sha1_git %s.' % self.SHA1_SAMPLE, ) @patch('swh.web.common.service.storage') def test_lookup_revision_msg_norev(self, mock_storage): # given mock_storage.revision_get.return_value = None # when with self.assertRaises(NotFoundExc) as cm: service.lookup_revision_message( self.SHA1_SAMPLE) # then mock_storage.revision_get.assert_called_with( [self.SHA1_SAMPLE_BIN]) self.assertEqual( cm.exception.args[0], 'Revision with sha1_git %s not found.' % self.SHA1_SAMPLE, ) @patch('swh.web.common.service.storage') def test_lookup_revision_multiple(self, mock_storage): # given sha1 = self.SHA1_SAMPLE sha1_other = 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc' stub_revisions = [ self.SAMPLE_REVISION_RAW, { 'id': hash_to_bytes(sha1_other), 'directory': 'abcdbe353ed3480476f032475e7c233eff7371d5', 'author': { 'name': b'name', 'email': b'name@surname.org', }, 'committer': { 'name': b'name', 'email': b'name@surname.org', }, 'message': b'ugly fix for bug 42', 'date': { 'timestamp': datetime.datetime( 2000, 1, 12, 5, 23, 54, tzinfo=datetime.timezone.utc).timestamp(), 'offset': 0, 'negative_utc': False }, 'date_offset': 0, 'committer_date': { 'timestamp': datetime.datetime( 2000, 1, 12, 5, 23, 54, tzinfo=datetime.timezone.utc).timestamp(), 'offset': 0, 'negative_utc': False }, 'committer_date_offset': 0, 'synthetic': False, 'type': 'git', 'parents': [], 'metadata': [], } ] mock_storage.revision_get.return_value = stub_revisions # when actual_revisions = service.lookup_revision_multiple( [sha1, sha1_other]) # then self.assertEqual(list(actual_revisions), [ self.SAMPLE_REVISION, { 'id': sha1_other, 'directory': 'abcdbe353ed3480476f032475e7c233eff7371d5', 'author': { 'name': 'name', 'email': 'name@surname.org', }, 'committer': { 'name': 'name', 'email': 'name@surname.org', }, 'message': 'ugly fix for bug 42', 'date': '2000-01-12T05:23:54+00:00', 'date_offset': 0, 'committer_date': '2000-01-12T05:23:54+00:00', 'committer_date_offset': 0, 'synthetic': False, 'type': 'git', 'parents': [], 'metadata': {}, 'merge': False } ]) self.assertEqual( list(mock_storage.revision_get.call_args[0][0]), [hash_to_bytes(sha1), hash_to_bytes(sha1_other)]) @patch('swh.web.common.service.storage') def test_lookup_revision_multiple_none_found(self, mock_storage): # given sha1_bin = self.SHA1_SAMPLE sha1_other = 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc' mock_storage.revision_get.return_value = [] # then actual_revisions = service.lookup_revision_multiple( [sha1_bin, sha1_other]) self.assertEqual(list(actual_revisions), []) self.assertEqual( list(mock_storage.revision_get.call_args[0][0]), [hash_to_bytes(self.SHA1_SAMPLE), hash_to_bytes(sha1_other)]) @patch('swh.web.common.service.storage') def test_lookup_revision_log(self, mock_storage): # given stub_revision_log = [self.SAMPLE_REVISION_RAW] mock_storage.revision_log = MagicMock(return_value=stub_revision_log) # when actual_revision = service.lookup_revision_log( 'abcdbe353ed3480476f032475e7c233eff7371d5', limit=25) # then self.assertEqual(list(actual_revision), [self.SAMPLE_REVISION]) mock_storage.revision_log.assert_called_with( [hash_to_bytes('abcdbe353ed3480476f032475e7c233eff7371d5')], 25) - @patch('swh.web.common.service.storage') - def test_lookup_revision_log_by(self, mock_storage): - # given - stub_revision_log = [self.SAMPLE_REVISION_RAW] - mock_storage.revision_log_by = MagicMock( - return_value=stub_revision_log) + @patch('swh.web.common.service.lookup_revision_log') + @patch('swh.web.common.service.lookup_snapshot') + @patch('swh.web.common.service.get_origin_visit') + def test_lookup_revision_log_by(self, mock_get_origin_visit, + mock_lookup_snapshot, + mock_lookup_revision_log): + # given + mock_get_origin_visit.return_value = {'snapshot': self.SHA1_SAMPLE} + mock_lookup_snapshot.return_value = \ + { + 'branches': { + 'refs/heads/master': { + 'target_type': 'revision', + 'target': self.SAMPLE_REVISION['id'] + } + } + } + + mock_lookup_revision_log.return_value = [self.SAMPLE_REVISION] # when actual_log = service.lookup_revision_log_by( 1, 'refs/heads/master', None, limit=100) # then self.assertEqual(list(actual_log), [self.SAMPLE_REVISION]) - mock_storage.revision_log_by.assert_called_with( - 1, 'refs/heads/master', None, limit=100) - - @patch('swh.web.common.service.storage') - def test_lookup_revision_log_by_nolog(self, mock_storage): + @patch('swh.web.common.service.lookup_snapshot') + @patch('swh.web.common.service.get_origin_visit') + def test_lookup_revision_log_by_notfound(self, mock_get_origin_visit, + mock_lookup_snapshot): # given - mock_storage.revision_log_by = MagicMock(return_value=None) + mock_get_origin_visit.return_value = {'snapshot': self.SHA1_SAMPLE} + mock_lookup_snapshot.return_value = {'branches': {}} # when - res = service.lookup_revision_log_by( - 1, 'refs/heads/master', None, limit=100) - # then - self.assertEqual(res, None) - mock_storage.revision_log_by.assert_called_with( - 1, 'refs/heads/master', None, limit=100) + with self.assertRaises(NotFoundExc): + service.lookup_revision_log_by( + 1, 'refs/heads/master', None, limit=100) @patch('swh.web.common.service.storage') def test_lookup_content_raw_not_found(self, mock_storage): # given mock_storage.content_find = MagicMock(return_value=None) # when with self.assertRaises(NotFoundExc) as cm: service.lookup_content_raw('sha1:' + self.SHA1_SAMPLE) self.assertIn(cm.exception.args[0], 'Content with %s checksum equals to %s not found!' % ('sha1', self.SHA1_SAMPLE)) mock_storage.content_find.assert_called_with( {'sha1': hash_to_bytes(self.SHA1_SAMPLE)}) @patch('swh.web.common.service.storage') def test_lookup_content_raw(self, mock_storage): # given mock_storage.content_find = MagicMock(return_value={ 'sha1': self.SHA1_SAMPLE, }) mock_storage.content_get = MagicMock(return_value=[{ 'data': b'binary data'}]) # when actual_content = service.lookup_content_raw( 'sha256:%s' % self.SHA256_SAMPLE) # then self.assertEqual(actual_content, {'data': b'binary data'}) mock_storage.content_find.assert_called_once_with( {'sha256': self.SHA256_SAMPLE_BIN}) mock_storage.content_get.assert_called_once_with( [hash_to_bytes(self.SHA1_SAMPLE)]) @patch('swh.web.common.service.storage') def test_lookup_content_not_found(self, mock_storage): # given mock_storage.content_find = MagicMock(return_value=None) # when with self.assertRaises(NotFoundExc) as cm: # then service.lookup_content('sha1:%s' % self.SHA1_SAMPLE) self.assertIn(cm.exception.args[0], 'Content with %s checksum equals to %s not found!' % ('sha1', self.SHA1_SAMPLE)) mock_storage.content_find.assert_called_with( {'sha1': self.SHA1_SAMPLE_BIN}) @patch('swh.web.common.service.storage') def test_lookup_content_with_sha1(self, mock_storage): # given mock_storage.content_find = MagicMock( return_value=self.SAMPLE_CONTENT_RAW) # when actual_content = service.lookup_content( 'sha1:%s' % self.SHA1_SAMPLE) # then self.assertEqual(actual_content, self.SAMPLE_CONTENT) mock_storage.content_find.assert_called_with( {'sha1': hash_to_bytes(self.SHA1_SAMPLE)}) @patch('swh.web.common.service.storage') def test_lookup_content_with_sha256(self, mock_storage): # given stub_content = self.SAMPLE_CONTENT_RAW stub_content['status'] = 'visible' expected_content = self.SAMPLE_CONTENT expected_content['status'] = 'visible' mock_storage.content_find = MagicMock( return_value=stub_content) # when actual_content = service.lookup_content( 'sha256:%s' % self.SHA256_SAMPLE) # then self.assertEqual(actual_content, expected_content) mock_storage.content_find.assert_called_with( {'sha256': self.SHA256_SAMPLE_BIN}) @patch('swh.web.common.service.storage') def test_lookup_person(self, mock_storage): # given mock_storage.person_get = MagicMock(return_value=[{ 'id': 'person_id', 'name': b'some_name', 'email': b'some-email', }]) # when actual_person = service.lookup_person('person_id') # then self.assertEqual(actual_person, { 'id': 'person_id', 'name': 'some_name', 'email': 'some-email', }) mock_storage.person_get.assert_called_with(['person_id']) @patch('swh.web.common.service.storage') def test_lookup_directory_bad_checksum(self, mock_storage): # given mock_storage.directory_ls = MagicMock() # when with self.assertRaises(BadInputExc): service.lookup_directory('directory_id') # then mock_storage.directory_ls.called = False @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory_not_found(self, mock_query, mock_storage): # given mock_query.parse_hash_with_algorithms_or_throws.return_value = ( 'sha1', 'directory-id-bin') mock_storage.directory_missing.return_value = ['directory-id-bin'] # when with self.assertRaises(NotFoundExc) as cm: service.lookup_directory('directory_id') self.assertIn('Directory with sha1_git directory_id not found', cm.exception.args[0]) # then mock_query.parse_hash_with_algorithms_or_throws.assert_called_with( 'directory_id', ['sha1'], 'Only sha1_git is supported.') @patch('swh.web.common.service.storage') @patch('swh.web.common.service.query') def test_lookup_directory(self, mock_query, mock_storage): mock_query.parse_hash_with_algorithms_or_throws.return_value = ( 'sha1', 'directory-sha1-bin') # given stub_dir_entries = [{ 'sha1': self.SHA1_SAMPLE_BIN, 'sha256': self.SHA256_SAMPLE_BIN, 'sha1_git': self.SHA1GIT_SAMPLE_BIN, 'blake2s256': self.BLAKE2S256_SAMPLE_BIN, 'target': hash_to_bytes( '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03'), 'dir_id': self.DIRECTORY_ID_BIN, 'name': b'bob', 'type': 10, }] expected_dir_entries = [{ 'checksums': { 'sha1': self.SHA1_SAMPLE, 'sha256': self.SHA256_SAMPLE, 'sha1_git': self.SHA1GIT_SAMPLE, 'blake2s256': self.BLAKE2S256_SAMPLE }, 'target': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'dir_id': self.DIRECTORY_ID, 'name': 'bob', 'type': 10, }] mock_storage.directory_ls.return_value = stub_dir_entries mock_storage.directory_missing.return_value = [] # when actual_directory_ls = list(service.lookup_directory( 'directory-sha1')) # then self.assertEqual(actual_directory_ls, expected_dir_entries) mock_query.parse_hash_with_algorithms_or_throws.assert_called_with( 'directory-sha1', ['sha1'], 'Only sha1_git is supported.') mock_storage.directory_ls.assert_called_with( 'directory-sha1-bin') @patch('swh.web.common.service.storage') def test_lookup_directory_empty(self, mock_storage): empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' mock_storage.directory_ls.return_value = [] # when actual_directory_ls = list(service.lookup_directory(empty_dir_sha1)) # then self.assertEqual(actual_directory_ls, []) self.assertFalse(mock_storage.directory_ls.called) - @patch('swh.web.common.service.storage') - def test_lookup_revision_by_nothing_found(self, mock_storage): + @patch('swh.web.common.service.lookup_snapshot') + @patch('swh.web.common.service.get_origin_visit') + def test_lookup_revision_by_nothing_found(self, mock_get_origin_visit, + mock_lookup_snapshot): # given - mock_storage.revision_get_by.return_value = None + mock_get_origin_visit.return_value = {'snapshot': self.SHA1_SAMPLE} + mock_lookup_snapshot.return_value = {'branches': {}} # when with self.assertRaises(NotFoundExc): service.lookup_revision_by(1) - # then - mock_storage.revision_get_by.assert_called_with(1, 'refs/heads/master', # noqa - limit=1, - timestamp=None) - - @patch('swh.web.common.service.storage') - def test_lookup_revision_by(self, mock_storage): - # given - stub_rev = self.SAMPLE_REVISION_RAW - - expected_rev = self.SAMPLE_REVISION - - mock_storage.revision_get_by.return_value = [stub_rev] - - # when - actual_revision = service.lookup_revision_by(10, 'master2', 'some-ts') - - # then - self.assertEqual(actual_revision, expected_rev) - - mock_storage.revision_get_by.assert_called_with(10, 'master2', - limit=1, - timestamp='some-ts') - - @patch('swh.web.common.service.storage') - def test_lookup_revision_by_nomerge(self, mock_storage): + @patch('swh.web.common.service.lookup_revision') + @patch('swh.web.common.service.lookup_snapshot') + @patch('swh.web.common.service.get_origin_visit') + def test_lookup_revision_by(self, mock_get_origin_visit, + mock_lookup_snapshot, mock_lookup_revision): # given - stub_rev = self.SAMPLE_REVISION_RAW - stub_rev['parents'] = [ - hash_to_bytes('adc83b19e793491b1c6ea0fd8b46cd9f32e592fc')] - expected_rev = self.SAMPLE_REVISION - expected_rev['parents'] = ['adc83b19e793491b1c6ea0fd8b46cd9f32e592fc'] - mock_storage.revision_get_by.return_value = [stub_rev] - # when - actual_revision = service.lookup_revision_by(10, 'master2', 'some-ts') - - # then - self.assertEqual(actual_revision, expected_rev) - - mock_storage.revision_get_by.assert_called_with(10, 'master2', - limit=1, - timestamp='some-ts') - - @patch('swh.web.common.service.storage') - def test_lookup_revision_by_merge(self, mock_storage): - # given - stub_rev = self.SAMPLE_REVISION_RAW - stub_rev['parents'] = [ - hash_to_bytes('adc83b19e793491b1c6ea0fd8b46cd9f32e592fc'), - hash_to_bytes('ffff3b19e793491b1c6db0fd8b46cd9f32e592fc') - ] - - expected_rev = self.SAMPLE_REVISION - expected_rev['parents'] = [ - 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', - 'ffff3b19e793491b1c6db0fd8b46cd9f32e592fc' - ] - expected_rev['merge'] = True + mock_get_origin_visit.return_value = {'snapshot': self.SHA1_SAMPLE} + mock_lookup_snapshot.return_value = \ + { + 'branches': { + 'master2': { + 'target_type': 'revision', + 'target': expected_rev['id'] + } + } + } - mock_storage.revision_get_by.return_value = [stub_rev] + mock_lookup_revision.return_value = expected_rev # when actual_revision = service.lookup_revision_by(10, 'master2', 'some-ts') # then self.assertEqual(actual_revision, expected_rev) - mock_storage.revision_get_by.assert_called_with(10, 'master2', - limit=1, - timestamp='some-ts') - @patch('swh.web.common.service.storage') def test_lookup_revision_with_context_by_ko(self, mock_storage): # given mock_storage.revision_get_by.return_value = None # when origin_id = 1 branch_name = 'master3' ts = None with self.assertRaises(NotFoundExc) as cm: service.lookup_revision_with_context_by(origin_id, branch_name, ts, 'sha1') # then self.assertIn( 'Revision with (origin_id: %s, branch_name: %s' ', ts: %s) not found.' % (origin_id, branch_name, ts), cm.exception.args[0]) mock_storage.revision_get_by.assert_called_once_with( origin_id, branch_name, limit=1, timestamp=ts) @patch('swh.web.common.service.lookup_revision_with_context') @patch('swh.web.common.service.storage') def test_lookup_revision_with_context_by( self, mock_storage, mock_lookup_revision_with_context, ): # given stub_root_rev = {'id': 'root-rev-id'} mock_storage.revision_get_by.return_value = [{'id': 'root-rev-id'}] stub_rev = {'id': 'rev-found'} mock_lookup_revision_with_context.return_value = stub_rev # when origin_id = 1 branch_name = 'master3' ts = None sha1_git = 'sha1' actual_root_rev, actual_rev = service.lookup_revision_with_context_by( origin_id, branch_name, ts, sha1_git) # then self.assertEqual(actual_root_rev, stub_root_rev) self.assertEqual(actual_rev, stub_rev) mock_storage.revision_get_by.assert_called_once_with( origin_id, branch_name, limit=1, timestamp=ts) mock_lookup_revision_with_context.assert_called_once_with( stub_root_rev, sha1_git, 100) def test_lookup_revision_through_ko_not_implemented(self): # then with self.assertRaises(NotImplementedError): service.lookup_revision_through({ 'something-unknown': 10, }) @patch('swh.web.common.service.lookup_revision_with_context_by') def test_lookup_revision_through_with_context_by(self, mock_lookup): # given stub_rev = {'id': 'rev'} mock_lookup.return_value = stub_rev # when actual_revision = service.lookup_revision_through({ 'origin_id': 1, 'branch_name': 'master', 'ts': None, 'sha1_git': 'sha1-git' }, limit=1000) # then self.assertEqual(actual_revision, stub_rev) mock_lookup.assert_called_once_with( 1, 'master', None, 'sha1-git', 1000) @patch('swh.web.common.service.lookup_revision_by') def test_lookup_revision_through_with_revision_by(self, mock_lookup): # given stub_rev = {'id': 'rev'} mock_lookup.return_value = stub_rev # when actual_revision = service.lookup_revision_through({ 'origin_id': 2, 'branch_name': 'master2', 'ts': 'some-ts', }, limit=10) # then self.assertEqual(actual_revision, stub_rev) mock_lookup.assert_called_once_with( 2, 'master2', 'some-ts') @patch('swh.web.common.service.lookup_revision_with_context') def test_lookup_revision_through_with_context(self, mock_lookup): # given stub_rev = {'id': 'rev'} mock_lookup.return_value = stub_rev # when actual_revision = service.lookup_revision_through({ 'sha1_git_root': 'some-sha1-root', 'sha1_git': 'some-sha1', }) # then self.assertEqual(actual_revision, stub_rev) mock_lookup.assert_called_once_with( 'some-sha1-root', 'some-sha1', 100) @patch('swh.web.common.service.lookup_revision') def test_lookup_revision_through_with_revision(self, mock_lookup): # given stub_rev = {'id': 'rev'} mock_lookup.return_value = stub_rev # when actual_revision = service.lookup_revision_through({ 'sha1_git': 'some-sha1', }) # then self.assertEqual(actual_revision, stub_rev) mock_lookup.assert_called_once_with( 'some-sha1') @patch('swh.web.common.service.lookup_revision_through') def test_lookup_directory_through_revision_ko_not_found( self, mock_lookup_rev): # given mock_lookup_rev.return_value = None # when with self.assertRaises(NotFoundExc): service.lookup_directory_through_revision( {'id': 'rev'}, 'some/path', 100) mock_lookup_rev.assert_called_once_with({'id': 'rev'}, 100) @patch('swh.web.common.service.lookup_revision_through') @patch('swh.web.common.service.lookup_directory_with_revision') def test_lookup_directory_through_revision_ok_with_data( self, mock_lookup_dir, mock_lookup_rev): # given mock_lookup_rev.return_value = {'id': 'rev-id'} mock_lookup_dir.return_value = {'type': 'dir', 'content': []} # when rev_id, dir_result = service.lookup_directory_through_revision( {'id': 'rev'}, 'some/path', 100) # then self.assertEqual(rev_id, 'rev-id') self.assertEqual(dir_result, {'type': 'dir', 'content': []}) mock_lookup_rev.assert_called_once_with({'id': 'rev'}, 100) mock_lookup_dir.assert_called_once_with('rev-id', 'some/path', False) @patch('swh.web.common.service.lookup_revision_through') @patch('swh.web.common.service.lookup_directory_with_revision') def test_lookup_directory_through_revision_ok_with_content( self, mock_lookup_dir, mock_lookup_rev): # given mock_lookup_rev.return_value = {'id': 'rev-id'} stub_result = {'type': 'file', 'revision': 'rev-id', 'content': {'data': b'blah', 'sha1': 'sha1'}} mock_lookup_dir.return_value = stub_result # when rev_id, dir_result = service.lookup_directory_through_revision( {'id': 'rev'}, 'some/path', 10, with_data=True) # then self.assertEqual(rev_id, 'rev-id') self.assertEqual(dir_result, stub_result) mock_lookup_rev.assert_called_once_with({'id': 'rev'}, 10) mock_lookup_dir.assert_called_once_with('rev-id', 'some/path', True) diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py index 64e4f0df..62564ae2 100644 --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -1,150 +1,117 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime -from unittest.mock import patch - from swh.web.common import utils from swh.web.common.exc import BadInputExc from swh.web.tests.testcase import WebTestCase class UtilsTestCase(WebTestCase): def test_shorten_path_noop(self): noops = [ '/api/', '/browse/', '/content/symbol/foobar/' ] for noop in noops: self.assertEqual( utils.shorten_path(noop), noop ) def test_shorten_path_sha1(self): sha1 = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' short_sha1 = sha1[:8] + '...' templates = [ '/api/1/content/sha1:%s/', '/api/1/content/sha1_git:%s/', '/api/1/directory/%s/', '/api/1/content/sha1:%s/ctags/', ] for template in templates: self.assertEqual( utils.shorten_path(template % sha1), template % short_sha1 ) def test_shorten_path_sha256(self): sha256 = ('aafb16d69fd30ff58afdd69036a26047' '213add102934013a014dfca031c41aef') short_sha256 = sha256[:8] + '...' templates = [ '/api/1/content/sha256:%s/', '/api/1/directory/%s/', '/api/1/content/sha256:%s/filetype/', ] for template in templates: self.assertEqual( utils.shorten_path(template % sha256), template % short_sha256 ) def test_parse_timestamp(self): input_timestamps = [ None, '2016-01-12', '2016-01-12T09:19:12+0100', 'Today is January 1, 2047 at 8:21:00AM', '1452591542', ] output_dates = [ None, datetime.datetime(2016, 1, 12, 0, 0), datetime.datetime(2016, 1, 12, 8, 19, 12, tzinfo=datetime.timezone.utc), datetime.datetime(2047, 1, 1, 8, 21), datetime.datetime(2016, 1, 12, 9, 39, 2, tzinfo=datetime.timezone.utc), ] for ts, exp_date in zip(input_timestamps, output_dates): self.assertEqual(utils.parse_timestamp(ts), exp_date) def test_format_utc_iso_date(self): self.assertEqual(utils.format_utc_iso_date('2017-05-04T13:27:13+02:00'), # noqa '04 May 2017, 11:27 UTC') def test_gen_path_info(self): input_path = '/home/user/swh-environment/swh-web/' expected_result = [ {'name': 'home', 'path': 'home'}, {'name': 'user', 'path': 'home/user'}, {'name': 'swh-environment', 'path': 'home/user/swh-environment'}, {'name': 'swh-web', 'path': 'home/user/swh-environment/swh-web'} ] path_info = utils.gen_path_info(input_path) self.assertEqual(path_info, expected_result) input_path = 'home/user/swh-environment/swh-web' path_info = utils.gen_path_info(input_path) self.assertEqual(path_info, expected_result) - @patch('swh.web.common.utils.service') - def test_get_origin_visits(self, mock_service): - mock_service.MAX_LIMIT = 2 - - def _lookup_origin_visits(*args, **kwargs): - if kwargs['last_visit'] is None: - return [{'visit': 1, - 'date': '2017-05-06T00:59:10+00:00', - 'metadata': {}}, - {'visit': 2, - 'date': '2017-08-06T00:59:10+00:00', - 'metadata': {}} - ] - else: - return [{'visit': 3, - 'date': '2017-09-06T00:59:10+00:00', - 'metadata': {}} - ] - - mock_service.lookup_origin_visits.side_effect = _lookup_origin_visits - - origin_info = { - 'id': 1, - 'type': 'git', - 'url': 'https://github.com/foo/bar', - } - - origin_visits = utils.get_origin_visits(origin_info) - - self.assertEqual(len(origin_visits), 3) - - def test_get_swh_persisent_id(self): + def test_get_swh_persistent_id(self): swh_object_type = 'content' sha1_git = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' expected_swh_id = 'swh:1:cnt:' + sha1_git self.assertEqual(utils.get_swh_persistent_id(swh_object_type, sha1_git), # noqa expected_swh_id) with self.assertRaises(BadInputExc) as cm: utils.get_swh_persistent_id('foo', sha1_git) self.assertIn('Invalid object', cm.exception.args[0]) with self.assertRaises(BadInputExc) as cm: utils.get_swh_persistent_id(swh_object_type, 'not a valid id') self.assertIn('Invalid object', cm.exception.args[0])