Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
index 2f32c2db8..71c179e4b 100644
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -1,382 +1,383 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from distutils.util import strtobool
from swh.web.common import service
from swh.web.common.exc import BadInputExc
from swh.web.common.utils import (
reverse, get_origin_visits
)
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
def _enrich_origin(origin):
if 'id' in origin:
o = origin.copy()
o['origin_visits_url'] = \
reverse('api-origin-visits', url_args={'origin_id': origin['id']})
return o
return origin
@api_route(r'/origin/(?P<origin_id>[0-9]+)/', 'api-origin')
@api_route(r'/origin/(?P<origin_type>[a-z]+)/url/(?P<origin_url>.+)/',
'api-origin')
@api_doc('/origin/')
def api_origin(request, origin_id=None, origin_type=None, origin_url=None):
"""
.. http:get:: /api/1/origin/(origin_id)/
Get information about a software origin.
:param int origin_id: a software origin identifier
:>json number id: the origin unique identifier
:>json string origin_visits_url: link to in order to get information about the
visits for that origin
:>json string type: the type of software origin (possible values are ``git``, ``svn``,
``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``)
:>json string url: the origin canonical url
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1/`
.. http:get:: /api/1/origin/(origin_type)/url/(origin_url)/
Get information about a software origin.
:param string origin_type: the origin type (possible values are ``git``, ``svn``,
``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``)
:param string origin_url: the origin url
:>json number id: the origin unique identifier
:>json string origin_visits_url: link to in order to get information about the
visits for that origin
:>json string type: the type of software origin
:>json string url: the origin canonical url
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/git/url/https://github.com/python/cpython/`
""" # noqa
ori_dict = {
'id': origin_id,
'type': origin_type,
'url': origin_url
}
ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]}
if 'id' in ori_dict:
error_msg = 'Origin with id %s not found.' % ori_dict['id']
else:
error_msg = 'Origin with type %s and URL %s not found' % (
ori_dict['type'], ori_dict['url'])
return api_lookup(
service.lookup_origin, ori_dict,
notfound_msg=error_msg,
enrich_fn=_enrich_origin)
@api_route(r'/origin/search/(?P<url_pattern>.+)/',
'api-origin-search')
@api_doc('/origin/search/')
def api_origin_search(request, url_pattern):
"""
.. http:get:: /api/1/origin/search/(url_pattern)/
Search for software origins whose urls contain a provided string
pattern or match a provided regular expression.
The search is performed in a case insensitive way.
:param string url_pattern: a string pattern or a regular expression
:query int offset: the number of found origins to skip before returning results
:query int limit: the maximum number of found origins to return
:query boolean regexp: if true, consider provided pattern as a regular expression
and search origins whose urls match it
:query boolean with_visit: if true, only return origins with at least one visit
by Software heritage
:>jsonarr number id: the origin unique identifier
:>jsonarr string origin_visits_url: link to in order to get information about the
visits for that origin
:>jsonarr string type: the type of software origin
:>jsonarr string url: the origin canonical url
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/search/python/?limit=2`
""" # noqa
result = {}
offset = int(request.query_params.get('offset', '0'))
limit = int(request.query_params.get('limit', '70'))
regexp = request.query_params.get('regexp', 'false')
with_visit = request.query_params.get('with_visit', 'false')
results = api_lookup(service.search_origin, url_pattern, offset, limit,
bool(strtobool(regexp)), bool(strtobool(with_visit)),
enrich_fn=_enrich_origin)
nb_results = len(results)
if nb_results == limit:
query_params = {}
query_params['offset'] = offset + limit
query_params['limit'] = limit
query_params['regexp'] = regexp
result['headers'] = {
'link-next': reverse('api-origin-search',
url_args={'url_pattern': url_pattern},
query_params=query_params)
}
result.update({
'results': results
})
return result
@api_route(r'/origin/metadata-search/',
'api-origin-metadata-search')
@api_doc('/origin/metadata-search/', noargs=True)
def api_origin_metadata_search(request):
"""
.. http:get:: /api/1/origin/metadata-search/
Search for software origins whose metadata (expressed as a
JSON-LD/CodeMeta dictionary) match the provided criteria.
For now, only full-text search on this dictionary is supported.
:query str fulltext: a string that will be matched against origin metadata;
results are ranked and ordered starting with the best ones.
:query int limit: the maximum number of found origins to return
+ (bounded to 100)
:>jsonarr number origin_id: the origin unique identifier
:>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary)
:>jsonarr string from_revision: the revision used to extract these
metadata (the current HEAD or one of the former HEADs)
:>jsonarr dict tool: the tool used to extract these metadata
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe`
""" # noqa
fulltext = request.query_params.get('fulltext', None)
- limit = int(request.query_params.get('limit', '70'))
+ limit = min(int(request.query_params.get('limit', '70')), 100)
if not fulltext:
content = '"fulltext" must be provided and non-empty.'
raise BadInputExc(content)
results = api_lookup(service.search_origin_metadata, fulltext, limit)
return {
'results': results,
}
@api_route(r'/origin/(?P<origin_id>[0-9]+)/visits/', 'api-origin-visits')
@api_doc('/origin/visits/')
def api_origin_visits(request, origin_id):
"""
.. http:get:: /api/1/origin/(origin_id)/visits/
Get information about all visits of a software origin.
Visits are returned sorted in descending order according
to their date.
:param int origin_id: a software origin identifier
:query int per_page: specify the number of visits to list, for pagination purposes
:query int last_visit: visit to start listing from, for pagination purposes
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:resheader Link: indicates that a subsequent result page is available and contains
the url pointing to it
:>jsonarr string date: ISO representation of the visit date (in UTC)
:>jsonarr number id: the unique identifier of the origin
:>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_id)/visit/(visit_id)/`
in order to get information about the visit
:>jsonarr string snapshot: the snapshot identifier of the visit
:>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/`
in order to get information about the snapshot of the visit
:>jsonarr string status: status of the visit (either **full**, **partial** or **ongoing**)
:>jsonarr number visit: the unique identifier of the visit
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1/visits/`
""" # noqa
result = {}
per_page = int(request.query_params.get('per_page', '10'))
last_visit = request.query_params.get('last_visit')
if last_visit:
last_visit = int(last_visit)
def _lookup_origin_visits(
origin_id, last_visit=last_visit, per_page=per_page):
all_visits = get_origin_visits({'id': origin_id})
all_visits.reverse()
visits = []
if not last_visit:
visits = all_visits[:per_page]
else:
for i, v in enumerate(all_visits):
if v['visit'] == last_visit:
visits = all_visits[i+1:i+1+per_page]
break
for v in visits:
yield v
def _enrich_origin_visit(origin_visit):
ov = origin_visit.copy()
ov['origin_visit_url'] = reverse('api-origin-visit',
url_args={'origin_id': origin_id,
'visit_id': ov['visit']})
snapshot = ov['snapshot']
if snapshot:
ov['snapshot_url'] = reverse('api-snapshot',
url_args={'snapshot_id': snapshot})
else:
ov['snapshot_url'] = None
return ov
results = api_lookup(_lookup_origin_visits, origin_id,
notfound_msg='No origin {} found'.format(origin_id),
enrich_fn=_enrich_origin_visit)
if results:
nb_results = len(results)
if nb_results == per_page:
new_last_visit = results[-1]['visit']
query_params = {}
query_params['last_visit'] = new_last_visit
if request.query_params.get('per_page'):
query_params['per_page'] = per_page
result['headers'] = {
'link-next': reverse('api-origin-visits',
url_args={'origin_id': origin_id},
query_params=query_params)
}
result.update({
'results': results
})
return result
@api_route(r'/origin/(?P<origin_id>[0-9]+)/visit/(?P<visit_id>[0-9]+)/',
'api-origin-visit')
@api_doc('/origin/visit/')
def api_origin_visit(request, origin_id, visit_id):
"""
.. http:get:: /api/1/origin/(origin_id)/visit/(visit_id)/
Get information about a specific visit of a software origin.
:param int origin_id: a software origin identifier
:param int visit_id: a visit identifier
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string date: ISO representation of the visit date (in UTC)
:>json number origin: the origin unique identifier
:>json string origin_url: link to get information about the origin
:>jsonarr string snapshot: the snapshot identifier of the visit
:>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/`
in order to get information about the snapshot of the visit
:>json string status: status of the visit (either **full**, **partial** or **ongoing**)
:>json number visit: the unique identifier of the visit
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1500/visit/1/`
""" # noqa
def _enrich_origin_visit(origin_visit):
ov = origin_visit.copy()
ov['origin_url'] = reverse('api-origin',
url_args={'origin_id': ov['origin']})
snapshot = ov['snapshot']
if snapshot:
ov['snapshot_url'] = reverse('api-snapshot',
url_args={'snapshot_id': snapshot})
else:
ov['snapshot_url'] = None
return ov
return api_lookup(
service.lookup_origin_visit, origin_id, visit_id,
notfound_msg=('No visit {} for origin {} found'
.format(visit_id, origin_id)),
enrich_fn=_enrich_origin_visit)
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
index 3f73b4621..0e883fbb5 100644
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -1,341 +1,352 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework.test import APITestCase
from unittest.mock import patch
from swh.storage.exc import StorageDBError, StorageAPIError
from swh.web.tests.testcase import SWHWebTestCase
class OriginApiTestCase(SWHWebTestCase, APITestCase):
def setUp(self):
self.origin_visit1 = {
'date': 1104616800.0,
'origin': 10,
'visit': 100,
'metadata': None,
'status': 'full',
}
self.origin1 = {
'id': 1234,
'url': 'ftp://some/url/to/origin/0',
'type': 'ftp'
}
@patch('swh.web.api.views.origin.get_origin_visits')
def test_api_1_lookup_origin_visits_raise_error(
self, mock_get_origin_visits,
):
# given
mock_get_origin_visits.side_effect = ValueError(
'voluntary error to check the bad request middleware.')
# when
rv = self.client.get('/api/1/origin/2/visits/')
# then
self.assertEqual(rv.status_code, 400)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, {
'exception': 'ValueError',
'reason': 'voluntary error to check the bad request middleware.'})
@patch('swh.web.common.utils.service')
def test_api_1_lookup_origin_visits_raise_swh_storage_error_db(
self, mock_service):
# given
mock_service.lookup_origin_visits.side_effect = StorageDBError(
'Storage exploded! Will be back online shortly!')
# when
rv = self.client.get('/api/1/origin/2/visits/')
# then
self.assertEqual(rv.status_code, 503)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, {
'exception': 'StorageDBError',
'reason':
'An unexpected error occurred in the backend: '
'Storage exploded! Will be back online shortly!'})
@patch('swh.web.common.utils.service')
def test_api_1_lookup_origin_visits_raise_swh_storage_error_api(
self, mock_service):
# given
mock_service.lookup_origin_visits.side_effect = StorageAPIError(
'Storage API dropped dead! Will resurrect from its ashes asap!'
)
# when
rv = self.client.get('/api/1/origin/2/visits/')
# then
self.assertEqual(rv.status_code, 503)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, {
'exception': 'StorageAPIError',
'reason':
'An unexpected error occurred in the api backend: '
'Storage API dropped dead! Will resurrect from its ashes asap!'
})
@patch('swh.web.api.views.origin.get_origin_visits')
def test_api_1_lookup_origin_visits(self, mock_get_origin_visits):
# given
stub_visits = [
{
'date': 1293919200.0,
'origin': 2,
'snapshot': '1234',
'visit': 1
},
{
'date': 1293919200.0,
'origin': 2,
'snapshot': '1234',
'visit': 2
},
{
'date': 1420149600.0,
'origin': 2,
'snapshot': '5678',
'visit': 3
},
{
'date': 1420149600.0,
'origin': 2,
'snapshot': '5678',
'visit': 4
}
]
mock_get_origin_visits.return_value = stub_visits
# when
rv = self.client.get('/api/1/origin/2/visits/?per_page=2&last_visit=3')
self.assertEqual(rv.status_code, 200)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, [
{
'date': 1293919200.0,
'origin': 2,
'snapshot': '1234',
'visit': 2,
'origin_visit_url': '/api/1/origin/2/visit/2/',
'snapshot_url': '/api/1/snapshot/1234/'
},
{
'date': 1293919200.0,
'origin': 2,
'snapshot': '1234',
'visit': 1,
'origin_visit_url': '/api/1/origin/2/visit/1/',
'snapshot_url': '/api/1/snapshot/1234/'
},
])
@patch('swh.web.api.views.origin.service')
def test_api_1_lookup_origin_visit(self, mock_service):
# given
origin_visit = self.origin_visit1.copy()
origin_visit.update({
'snapshot': '57478754'
})
mock_service.lookup_origin_visit.return_value = origin_visit
expected_origin_visit = self.origin_visit1.copy()
expected_origin_visit.update({
'origin_url': '/api/1/origin/10/',
'snapshot': '57478754',
'snapshot_url': '/api/1/snapshot/57478754/'
})
# when
rv = self.client.get('/api/1/origin/10/visit/100/')
self.assertEqual(rv.status_code, 200)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, expected_origin_visit)
mock_service.lookup_origin_visit.assert_called_once_with('10', '100')
@patch('swh.web.api.views.origin.service')
def test_api_1_lookup_origin_visit_not_found(self, mock_service):
# given
mock_service.lookup_origin_visit.return_value = None
# when
rv = self.client.get('/api/1/origin/1/visit/1000/')
self.assertEqual(rv.status_code, 404)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, {
'exception': 'NotFoundExc',
'reason': 'No visit 1000 for origin 1 found'
})
mock_service.lookup_origin_visit.assert_called_once_with('1', '1000')
@patch('swh.web.api.views.origin.service')
def test_api_origin_by_id(self, mock_service):
# given
mock_service.lookup_origin.return_value = self.origin1
expected_origin = self.origin1.copy()
expected_origin.update({
'origin_visits_url': '/api/1/origin/1234/visits/'
})
# when
rv = self.client.get('/api/1/origin/1234/')
# then
self.assertEqual(rv.status_code, 200)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, expected_origin)
mock_service.lookup_origin.assert_called_with({'id': '1234'})
@patch('swh.web.api.views.origin.service')
def test_api_origin_by_type_url(self, mock_service):
# given
stub_origin = self.origin1.copy()
stub_origin.update({
'id': 987
})
mock_service.lookup_origin.return_value = stub_origin
expected_origin = stub_origin.copy()
expected_origin.update({
'origin_visits_url': '/api/1/origin/987/visits/'
})
# when
rv = self.client.get('/api/1/origin/ftp/url'
'/ftp://some/url/to/origin/0/')
# then
self.assertEqual(rv.status_code, 200)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, expected_origin)
mock_service.lookup_origin.assert_called_with(
{'url': 'ftp://some/url/to/origin/0',
'type': 'ftp'})
@patch('swh.web.api.views.origin.service')
def test_api_origin_not_found(self, mock_service):
# given
mock_service.lookup_origin.return_value = None
# when
rv = self.client.get('/api/1/origin/4321/')
# then
self.assertEqual(rv.status_code, 404)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(rv.data, {
'exception': 'NotFoundExc',
'reason': 'Origin with id 4321 not found.'
})
mock_service.lookup_origin.assert_called_with({'id': '4321'})
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search(self, mock_idx_storage):
# given
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.return_value = [{
'from_revision':
b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8',
'metadata': {'author': 'Jane Doe'},
'origin_id': 54974445,
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1'
}
}]
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
expected_data = [{
'origin_id': 54974445,
'metadata': {'author': 'Jane Doe'},
'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238',
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1',
}
}]
self.assertEqual(rv.data, expected_data)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=70)
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search_limit(self, mock_idx_storage):
# given
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.return_value = [{
'from_revision':
b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8',
'metadata': {'author': 'Jane Doe'},
'origin_id': 54974445,
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1'
}
}]
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(len(rv.data), 1)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=70)
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=10')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(len(rv.data), 1)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=10)
+ # when
+ rv = self.client.get(
+ '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=987')
+
+ # then
+ self.assertEqual(rv.status_code, 200, rv.content)
+ self.assertEqual(rv['Content-Type'], 'application/json')
+ self.assertEqual(len(rv.data), 1)
+ mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
+ .assert_called_with(conjunction=['Jane Doe'], limit=100)
+
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search_invalid(self, mock_idx_storage):
rv = self.client.get('/api/1/origin/metadata-search/')
# then
self.assertEqual(rv.status_code, 400, rv.content)
mock_idx_storage.assert_not_called()

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:40 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3236800

Event Timeline