diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 2f32c2db8..71c179e4b 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,382 +1,383 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from swh.web.common import service from swh.web.common.exc import BadInputExc from swh.web.common.utils import ( reverse, get_origin_visits ) from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup def _enrich_origin(origin): if 'id' in origin: o = origin.copy() o['origin_visits_url'] = \ reverse('api-origin-visits', url_args={'origin_id': origin['id']}) return o return origin @api_route(r'/origin/(?P[0-9]+)/', 'api-origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)/', 'api-origin') @api_doc('/origin/') def api_origin(request, origin_id=None, origin_type=None, origin_url=None): """ .. http:get:: /api/1/origin/(origin_id)/ Get information about a software origin. :param int origin_id: a software origin identifier :>json number id: the origin unique identifier :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string type: the type of software origin (possible values are ``git``, ``svn``, ``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``) :>json string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1/` .. http:get:: /api/1/origin/(origin_type)/url/(origin_url)/ Get information about a software origin. :param string origin_type: the origin type (possible values are ``git``, ``svn``, ``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``) :param string origin_url: the origin url :>json number id: the origin unique identifier :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string type: the type of software origin :>json string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/git/url/https://github.com/python/cpython/` """ # noqa ori_dict = { 'id': origin_id, 'type': origin_type, 'url': origin_url } ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]} if 'id' in ori_dict: error_msg = 'Origin with id %s not found.' % ori_dict['id'] else: error_msg = 'Origin with type %s and URL %s not found' % ( ori_dict['type'], ori_dict['url']) return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=_enrich_origin) @api_route(r'/origin/search/(?P.+)/', 'api-origin-search') @api_doc('/origin/search/') def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. :param string url_pattern: a string pattern or a regular expression :query int offset: the number of found origins to skip before returning results :query int limit: the maximum number of found origins to return :query boolean regexp: if true, consider provided pattern as a regular expression and search origins whose urls match it :query boolean with_visit: if true, only return origins with at least one visit by Software heritage :>jsonarr number id: the origin unique identifier :>jsonarr string origin_visits_url: link to in order to get information about the visits for that origin :>jsonarr string type: the type of software origin :>jsonarr string url: the origin canonical url :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ # noqa result = {} offset = int(request.query_params.get('offset', '0')) limit = int(request.query_params.get('limit', '70')) regexp = request.query_params.get('regexp', 'false') with_visit = request.query_params.get('with_visit', 'false') results = api_lookup(service.search_origin, url_pattern, offset, limit, bool(strtobool(regexp)), bool(strtobool(with_visit)), enrich_fn=_enrich_origin) nb_results = len(results) if nb_results == limit: query_params = {} query_params['offset'] = offset + limit query_params['limit'] = limit query_params['regexp'] = regexp result['headers'] = { 'link-next': reverse('api-origin-search', url_args={'url_pattern': url_pattern}, query_params=query_params) } result.update({ 'results': results }) return result @api_route(r'/origin/metadata-search/', 'api-origin-metadata-search') @api_doc('/origin/metadata-search/', noargs=True) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return + (bounded to 100) :>jsonarr number origin_id: the origin unique identifier :>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary) :>jsonarr string from_revision: the revision used to extract these metadata (the current HEAD or one of the former HEADs) :>jsonarr dict tool: the tool used to extract these metadata :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ # noqa fulltext = request.query_params.get('fulltext', None) - limit = int(request.query_params.get('limit', '70')) + limit = min(int(request.query_params.get('limit', '70')), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) results = api_lookup(service.search_origin_metadata, fulltext, limit) return { 'results': results, } @api_route(r'/origin/(?P[0-9]+)/visits/', 'api-origin-visits') @api_doc('/origin/visits/') def api_origin_visits(request, origin_id): """ .. http:get:: /api/1/origin/(origin_id)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param int origin_id: a software origin identifier :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it :>jsonarr string date: ISO representation of the visit date (in UTC) :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_id)/visit/(visit_id)/` in order to get information about the visit :>jsonarr string snapshot: the snapshot identifier of the visit :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit :>jsonarr string status: status of the visit (either **full**, **partial** or **ongoing**) :>jsonarr number visit: the unique identifier of the visit **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1/visits/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits({'id': origin_id}) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v['visit'] == last_visit: visits = all_visits[i+1:i+1+per_page] break for v in visits: yield v def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_visit_url'] = reverse('api-origin-visit', url_args={'origin_id': origin_id, 'visit_id': ov['visit']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None return ov results = api_lookup(_lookup_origin_visits, origin_id, notfound_msg='No origin {} found'.format(origin_id), enrich_fn=_enrich_origin_visit) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-origin-visits', url_args={'origin_id': origin_id}, query_params=query_params) } result.update({ 'results': results }) return result @api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/', 'api-origin-visit') @api_doc('/origin/visit/') def api_origin_visit(request, origin_id, visit_id): """ .. http:get:: /api/1/origin/(origin_id)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param int origin_id: a software origin identifier :param int visit_id: a visit identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json string date: ISO representation of the visit date (in UTC) :>json number origin: the origin unique identifier :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/1500/visit/1/` """ # noqa def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_url'] = reverse('api-origin', url_args={'origin_id': ov['origin']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None return ov return api_lookup( service.lookup_origin_visit, origin_id, visit_id, notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_id)), enrich_fn=_enrich_origin_visit) diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py index 3f73b4621..0e883fbb5 100644 --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -1,341 +1,352 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.test import APITestCase from unittest.mock import patch from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.tests.testcase import SWHWebTestCase class OriginApiTestCase(SWHWebTestCase, APITestCase): def setUp(self): self.origin_visit1 = { 'date': 1104616800.0, 'origin': 10, 'visit': 100, 'metadata': None, 'status': 'full', } self.origin1 = { 'id': 1234, 'url': 'ftp://some/url/to/origin/0', 'type': 'ftp' } @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits_raise_error( self, mock_get_origin_visits, ): # given mock_get_origin_visits.side_effect = ValueError( 'voluntary error to check the bad request middleware.') # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 400) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'ValueError', 'reason': 'voluntary error to check the bad request middleware.'}) @patch('swh.web.common.utils.service') def test_api_1_lookup_origin_visits_raise_swh_storage_error_db( self, mock_service): # given mock_service.lookup_origin_visits.side_effect = StorageDBError( 'Storage exploded! Will be back online shortly!') # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 503) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'StorageDBError', 'reason': 'An unexpected error occurred in the backend: ' 'Storage exploded! Will be back online shortly!'}) @patch('swh.web.common.utils.service') def test_api_1_lookup_origin_visits_raise_swh_storage_error_api( self, mock_service): # given mock_service.lookup_origin_visits.side_effect = StorageAPIError( 'Storage API dropped dead! Will resurrect from its ashes asap!' ) # when rv = self.client.get('/api/1/origin/2/visits/') # then self.assertEqual(rv.status_code, 503) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'StorageAPIError', 'reason': 'An unexpected error occurred in the api backend: ' 'Storage API dropped dead! Will resurrect from its ashes asap!' }) @patch('swh.web.api.views.origin.get_origin_visits') def test_api_1_lookup_origin_visits(self, mock_get_origin_visits): # given stub_visits = [ { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 1 }, { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 2 }, { 'date': 1420149600.0, 'origin': 2, 'snapshot': '5678', 'visit': 3 }, { 'date': 1420149600.0, 'origin': 2, 'snapshot': '5678', 'visit': 4 } ] mock_get_origin_visits.return_value = stub_visits # when rv = self.client.get('/api/1/origin/2/visits/?per_page=2&last_visit=3') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, [ { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 2, 'origin_visit_url': '/api/1/origin/2/visit/2/', 'snapshot_url': '/api/1/snapshot/1234/' }, { 'date': 1293919200.0, 'origin': 2, 'snapshot': '1234', 'visit': 1, 'origin_visit_url': '/api/1/origin/2/visit/1/', 'snapshot_url': '/api/1/snapshot/1234/' }, ]) @patch('swh.web.api.views.origin.service') def test_api_1_lookup_origin_visit(self, mock_service): # given origin_visit = self.origin_visit1.copy() origin_visit.update({ 'snapshot': '57478754' }) mock_service.lookup_origin_visit.return_value = origin_visit expected_origin_visit = self.origin_visit1.copy() expected_origin_visit.update({ 'origin_url': '/api/1/origin/10/', 'snapshot': '57478754', 'snapshot_url': '/api/1/snapshot/57478754/' }) # when rv = self.client.get('/api/1/origin/10/visit/100/') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin_visit) mock_service.lookup_origin_visit.assert_called_once_with('10', '100') @patch('swh.web.api.views.origin.service') def test_api_1_lookup_origin_visit_not_found(self, mock_service): # given mock_service.lookup_origin_visit.return_value = None # when rv = self.client.get('/api/1/origin/1/visit/1000/') self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No visit 1000 for origin 1 found' }) mock_service.lookup_origin_visit.assert_called_once_with('1', '1000') @patch('swh.web.api.views.origin.service') def test_api_origin_by_id(self, mock_service): # given mock_service.lookup_origin.return_value = self.origin1 expected_origin = self.origin1.copy() expected_origin.update({ 'origin_visits_url': '/api/1/origin/1234/visits/' }) # when rv = self.client.get('/api/1/origin/1234/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin) mock_service.lookup_origin.assert_called_with({'id': '1234'}) @patch('swh.web.api.views.origin.service') def test_api_origin_by_type_url(self, mock_service): # given stub_origin = self.origin1.copy() stub_origin.update({ 'id': 987 }) mock_service.lookup_origin.return_value = stub_origin expected_origin = stub_origin.copy() expected_origin.update({ 'origin_visits_url': '/api/1/origin/987/visits/' }) # when rv = self.client.get('/api/1/origin/ftp/url' '/ftp://some/url/to/origin/0/') # then self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_origin) mock_service.lookup_origin.assert_called_with( {'url': 'ftp://some/url/to/origin/0', 'type': 'ftp'}) @patch('swh.web.api.views.origin.service') def test_api_origin_not_found(self, mock_service): # given mock_service.lookup_origin.return_value = None # when rv = self.client.get('/api/1/origin/4321/') # then self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Origin with id 4321 not found.' }) mock_service.lookup_origin.assert_called_with({'id': '4321'}) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search(self, mock_idx_storage): # given mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .return_value = [{ 'from_revision': b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', 'metadata': {'author': 'Jane Doe'}, 'origin_id': 54974445, 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = [{ 'origin_id': 54974445, 'metadata': {'author': 'Jane Doe'}, 'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238', 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1', } }] self.assertEqual(rv.data, expected_data) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=70) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search_limit(self, mock_idx_storage): # given mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .return_value = [{ 'from_revision': b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', 'metadata': {'author': 'Jane Doe'}, 'origin_id': 54974445, 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(len(rv.data), 1) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=70) # when rv = self.client.get( '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=10') # then self.assertEqual(rv.status_code, 200, rv.content) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(len(rv.data), 1) mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ .assert_called_with(conjunction=['Jane Doe'], limit=10) + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=987') + + # then + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=100) + @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search_invalid(self, mock_idx_storage): rv = self.client.get('/api/1/origin/metadata-search/') # then self.assertEqual(rv.status_code, 400, rv.content) mock_idx_storage.assert_not_called()