diff --git a/docs/uri-scheme-api-identifiers.rst b/docs/uri-scheme-api-identifiers.rst index 4d8c40a6..8beeb92f 100644 --- a/docs/uri-scheme-api-identifiers.rst +++ b/docs/uri-scheme-api-identifiers.rst @@ -1,4 +1,7 @@ Persistent identifiers ---------------------- .. autosimple:: swh.web.api.views.identifiers.api_resolve_swh_pid + +.. autosimple:: swh.web.api.views.identifiers.api_swh_pid_known + diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py index a29c12c5..28fdfcc5 100644 --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -1,105 +1,120 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service, utils from swh.web.common.utils import ( resolve_swh_persistent_id, get_persistent_identifier ) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.exc import LargePayloadExc @api_route(r'/resolve/(?P.*)/', 'api-1-resolve-swh-pid') @api_doc('/resolve/') @format_docstring() def api_resolve_swh_pid(request, swh_id): """ .. http:get:: /api/1/resolve/(swh_id)/ Resolve a Software Heritage persistent identifier. - Try to resolve a provided `persistent identifier `_ + Try to resolve a provided `persistent identifier + `_ into an url for browsing the pointed archive object. If the provided identifier is valid, the existence of the object in the archive will also be checked. :param string swh_id: a Software Heritage persistent identifier :>json string browse_url: the url for browsing the pointed object - :>json object metadata: object holding optional parts of the persistent identifier + :>json object metadata: object holding optional parts of the + persistent identifier :>json string namespace: the persistent identifier namespace :>json string object_id: the hash identifier of the pointed object :>json string object_type: the type of the pointed object - :>json number scheme_version: the scheme version of the persistent identifier + :>json number scheme_version: the scheme version of the persistent + identifier {common_headers} :statuscode 200: no error :statuscode 400: an invalid persistent identifier has been provided :statuscode 404: the pointed object does not exist in the archive **Example:** .. parsed-literal:: :swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/` - """ # noqa + """ # try to resolve the provided pid swh_id_resolved = resolve_swh_persistent_id(swh_id) # id is well-formed, now check that the pointed # object is present in the archive, NotFoundExc # will be raised otherwise swh_id_parsed = swh_id_resolved['swh_id_parsed'] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() swh_id_data['browse_url'] = request.build_absolute_uri( swh_id_resolved['browse_url']) return swh_id_data @api_route(r'/known/', - 'api-1-swh-pid-known', methods=['POST']) -@api_doc('/known/', tags=['hidden']) + 'api-1-known', methods=['POST']) +@api_doc('/known/') @format_docstring() def api_swh_pid_known(request): """ .. http:post:: /api/1/known/ - Check if a list of Software Heritage persistent identifier is present - in the archive depending on their id (sha1_git). + Check if a list of objects are present in the Software Heritage + archive. + + The objects to check existence must be provided using Software Heritage + `persistent identifiers + `_. + + :json object : an object whose keys are input persistent + identifiers and values objects with the following keys: - Returns: - A dictionary with: - keys(str): Persistent identifier - values(dict): A dictionary containing the key 'known'. (true if - the pid is present, False otherwise) + * **known (bool)**: whether the object was found + + {common_headers} + + :statuscode 200: no error + :statuscode 400: an invalid persistent identifier was provided + :statuscode 413: the input array of persistent identifiers is too large """ limit = 1000 if len(request.data) > limit: raise LargePayloadExc('The maximum number of PIDs this endpoint can ' 'receive is %s' % limit) persistent_ids = [get_persistent_identifier(pid) for pid in request.data] response = {str(pid): {'known': False} for pid in persistent_ids} # group pids by their type pids_by_type = utils.group_swh_persistent_identifiers(persistent_ids) # search for hashes not present in the storage missing_hashes = service.lookup_missing_hashes(pids_by_type) for pid in persistent_ids: if pid.object_id not in missing_hashes: response[str(pid)]['known'] = True return response diff --git a/swh/web/tests/api/views/test_identifiers.py b/swh/web/tests/api/views/test_identifiers.py index 12e7a90b..cc200d2b 100644 --- a/swh/web/tests/api/views/test_identifiers.py +++ b/swh/web/tests/api/views/test_identifiers.py @@ -1,161 +1,161 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given from swh.model.identifiers import ( CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT ) from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import ( content, directory, origin, release, revision, snapshot, unknown_content, unknown_directory, unknown_release, unknown_revision, unknown_snapshot ) @given(origin(), content(), directory(), release(), revision(), snapshot()) def test_swh_id_resolve_success(api_client, origin, content, directory, release, revision, snapshot): for obj_type_short, obj_type, obj_id in ( ('cnt', CONTENT, content['sha1_git']), ('dir', DIRECTORY, directory), ('rel', RELEASE, release), ('rev', REVISION, revision), ('snp', SNAPSHOT, snapshot)): swh_id = 'swh:1:%s:%s;origin=%s' % (obj_type_short, obj_id, origin['url']) url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) if obj_type == CONTENT: url_args = {'query_string': 'sha1_git:%s' % obj_id} elif obj_type == SNAPSHOT: url_args = {'snapshot_id': obj_id} else: url_args = {'sha1_git': obj_id} browse_rev_url = reverse('browse-%s' % obj_type, url_args=url_args, query_params={'origin': origin['url']}, request=resp.wsgi_request) expected_result = { 'browse_url': browse_rev_url, 'metadata': {'origin': origin['url']}, 'namespace': 'swh', 'object_id': obj_id, 'object_type': obj_type, 'scheme_version': 1 } assert resp.status_code == 200, resp.data assert resp.data == expected_result def test_swh_id_resolve_invalid(api_client): rev_id_invalid = '96db9023b8_foo_50d6c108e9a3' swh_id = 'swh:1:rev:%s' % rev_id_invalid url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) assert resp.status_code == 400, resp.data @given(unknown_content(), unknown_directory(), unknown_release(), unknown_revision(), unknown_snapshot()) def test_swh_id_resolve_not_found(api_client, unknown_content, unknown_directory, unknown_release, unknown_revision, unknown_snapshot): for obj_type_short, obj_id in (('cnt', unknown_content['sha1_git']), ('dir', unknown_directory), ('rel', unknown_release), ('rev', unknown_revision), ('snp', unknown_snapshot)): swh_id = 'swh:1:%s:%s' % (obj_type_short, obj_id) url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) assert resp.status_code == 404, resp.data def test_swh_origin_id_not_resolvable(api_client): ori_pid = 'swh:1:ori:8068d0075010b590762c6cb5682ed53cb3c13deb' url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': ori_pid}) resp = api_client.get(url) assert resp.status_code == 400, resp.data @given(content(), directory()) def test_api_known_swhpid_some_present(api_client, content, directory): content_ = 'swh:1:cnt:%s' % content['sha1_git'] directory_ = 'swh:1:dir:%s' % directory unknown_revision_ = 'swh:1:rev:%s' % random_sha1() unknown_release_ = 'swh:1:rel:%s' % random_sha1() unknown_snapshot_ = 'swh:1:snp:%s' % random_sha1() input_pids = [content_, directory_, unknown_revision_, unknown_release_, unknown_snapshot_] - url = reverse('api-1-swh-pid-known') + url = reverse('api-1-known') resp = api_client.post(url, data=input_pids, format='json', HTTP_ACCEPT='application/json') assert resp.status_code == 200, resp.data assert resp['Content-Type'] == 'application/json' assert resp.data == { content_: {'known': True}, directory_: {'known': True}, unknown_revision_: {'known': False}, unknown_release_: {'known': False}, unknown_snapshot_: {'known': False} } def test_api_known_invalid_swhpid(api_client): invalid_pid_sha1 = ['swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13de;'] invalid_pid_type = ['swh:1:cnn:8068d0075010b590762c6cb5682ed53cb3c13deb'] - url = reverse('api-1-swh-pid-known') + url = reverse('api-1-known') resp = api_client.post(url, data=invalid_pid_sha1, format='json', HTTP_ACCEPT='application/json') assert resp.status_code == 400, resp.data resp2 = api_client.post(url, data=invalid_pid_type, format='json', HTTP_ACCEPT='application/json') assert resp2.status_code == 400, resp.data def test_api_known_raises_large_payload_error(api_client): random_pid = 'swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13deb' limit = 10000 err_msg = 'The maximum number of PIDs this endpoint can receive is 1000' pids = [random_pid for i in range(limit)] - url = reverse('api-1-swh-pid-known') + url = reverse('api-1-known') resp = api_client.post(url, data=pids, format='json', HTTP_ACCEPT='application/json') assert resp.status_code == 413, resp.data assert resp['Content-Type'] == 'application/json' assert resp.data == { 'exception': 'LargePayloadExc', 'reason': err_msg }