diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index 9220c26f..df5eabc0 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,272 +1,287 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict + from django.http import HttpResponse from django.shortcuts import redirect from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup from swh.web.common import archive, query from swh.web.common.utils import reverse # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split("_")[0] if request.method == "GET": return api_lookup( archive.vault_progress, obj_type, obj_id, notfound_msg=( "Cooking of {} '{}' was never requested.".format(object_name, hex_id) ), request=request, ) elif request.method == "POST": email = request.POST.get("email", request.GET.get("email", None)) return api_lookup( archive.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found.".format(object_name.title(), hex_id)), request=request, ) +def _vault_response(vault_response: Dict[str, Any]) -> Dict[str, Any]: + return { + "fetch_url": vault_response["fetch_url"], + "obj_type": vault_response["type"], + "progress_message": vault_response["progress_msg"], + "id": vault_response["task_id"], + "status": vault_response["task_status"], + "obj_id": vault_response["object_id"], + } + + @never_cache @api_route( r"/vault/directory/(?P[0-9a-f]+)/", "api-1-vault-cook-directory", methods=["GET", "POST"], checksum_args=["dir_id"], throttle_scope="swh_vault_cooking", ) @api_doc("/vault/directory/") @format_docstring() def api_vault_cook_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/ .. http:post:: /api/1/vault/directory/(dir_id)/ Request the cooking of an archive for a directory or check its cooking status. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/directory/(dir_id)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/directory.tar.gz :param string dir_id: the directory's sha1 identifier :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/directory/(dir_id)/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "directory", obj_id) res["fetch_url"] = reverse( - "api-1-vault-fetch-directory", url_args={"dir_id": dir_id} + "api-1-vault-fetch-directory", url_args={"dir_id": dir_id}, request=request, ) - return res + return _vault_response(res) @api_route( r"/vault/directory/(?P[0-9a-f]+)/raw/", "api-1-vault-fetch-directory", checksum_args=["dir_id"], ) @api_doc("/vault/directory/raw/") def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ Fetch the cooked archive for a directory. See :http:get:`/api/1/vault/directory/(dir_id)/` to get more details on directory cooking. :param string dir_id: the directory's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "directory", obj_id, notfound_msg="Cooked archive for directory '{}' not found.".format(dir_id), request=request, ) fname = "{}.tar.gz".format(dir_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response @never_cache @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/", "api-1-vault-cook-revision_gitfast", methods=["GET", "POST"], checksum_args=["rev_id"], throttle_scope="swh_vault_cooking", ) @api_doc("/vault/revision/gitfast/") @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ .. http:post:: /api/1/vault/revision/(rev_id)/gitfast/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD :param string rev_id: the revision's sha1 identifier :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "revision_gitfast", obj_id) res["fetch_url"] = reverse( - "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} + "api-1-vault-fetch-revision_gitfast", + url_args={"rev_id": rev_id}, + request=request, ) - return res + return _vault_response(res) @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/raw/", "api-1-vault-fetch-revision_gitfast", checksum_args=["rev_id"], ) @api_doc("/vault/revision/gitfast/raw/") def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more details on directory cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "revision_gitfast", obj_id, notfound_msg="Cooked archive for revision '{}' not found.".format(rev_id), request=request, ) fname = "{}.gitfast.gz".format(rev_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response @api_route( r"/vault/revision_gitfast/(?P[0-9a-f]+)/raw/", "api-1-vault-revision_gitfast-raw", checksum_args=["rev_id"], ) @api_doc("/vault/revision_gitfast/raw/", tags=["hidden"]) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ rev_gitfast_raw_url = reverse( "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} ) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/tests/api/views/test_vault.py b/swh/web/tests/api/views/test_vault.py index d1d4da13..870029b3 100644 --- a/swh/web/tests/api/views/test_vault.py +++ b/swh/web/tests/api/views/test_vault.py @@ -1,167 +1,168 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given from swh.model import hashutil from swh.vault.exc import NotFoundExc from swh.web.common.utils import reverse from swh.web.tests.strategies import ( directory, revision, unknown_directory, unknown_revision, ) from swh.web.tests.utils import ( check_api_get_responses, check_api_post_responses, check_http_get_response, check_http_post_response, ) @given(directory(), revision()) def test_api_vault_cook(api_client, mocker, directory, revision): mock_archive = mocker.patch("swh.web.api.views.vault.archive") for obj_type, obj_id in ( ("directory", directory), ("revision_gitfast", revision), ): fetch_url = reverse( f"api-1-vault-fetch-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) stub_cook = { - "fetch_url": fetch_url, - "obj_id": obj_id, - "obj_type": obj_type, - "progress_message": None, - "status": "done", - "task_uuid": "de75c902-5ee5-4739-996e-448376a93eff", + "type": obj_type, + "progress_msg": None, + "task_id": 1, + "task_status": "done", + "object_id": obj_id, } stub_fetch = b"content" mock_archive.vault_cook.return_value = stub_cook mock_archive.vault_fetch.return_value = stub_fetch email = "test@test.mail" url = reverse( f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, query_params={"email": email}, ) rv = check_api_post_responses(api_client, url, data=None, status_code=200) - - stub_cook["fetch_url"] = rv.wsgi_request.build_absolute_uri( - stub_cook["fetch_url"] - ) - - assert rv.data == stub_cook + assert rv.data == { + "fetch_url": rv.wsgi_request.build_absolute_uri(fetch_url), + "obj_type": obj_type, + "progress_message": None, + "id": 1, + "status": "done", + "obj_id": obj_id, + } mock_archive.vault_cook.assert_called_with( obj_type, hashutil.hash_to_bytes(obj_id), email ) rv = check_http_get_response(api_client, fetch_url, status_code=200) assert rv["Content-Type"] == "application/gzip" assert rv.content == stub_fetch mock_archive.vault_fetch.assert_called_with( obj_type, hashutil.hash_to_bytes(obj_id) ) @given(directory(), revision()) def test_api_vault_cook_uppercase_hash(api_client, directory, revision): for obj_type, obj_id in ( ("directory", directory), ("revision_gitfast", revision), ): url = reverse( f"api-1-vault-cook-{obj_type}-uppercase-checksum", url_args={f"{obj_type[:3]}_id": obj_id.upper()}, ) rv = check_http_post_response( api_client, url, data={"email": "test@test.mail"}, status_code=302 ) redirect_url = reverse( f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id} ) assert rv["location"] == redirect_url fetch_url = reverse( f"api-1-vault-fetch-{obj_type}-uppercase-checksum", url_args={f"{obj_type[:3]}_id": obj_id.upper()}, ) rv = check_http_get_response(api_client, fetch_url, status_code=302) redirect_url = reverse( f"api-1-vault-fetch-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) assert rv["location"] == redirect_url @given(directory(), revision(), unknown_directory(), unknown_revision()) def test_api_vault_cook_notfound( api_client, mocker, directory, revision, unknown_directory, unknown_revision ): mock_vault = mocker.patch("swh.web.common.archive.vault") mock_vault.cook.side_effect = NotFoundExc("object not found") mock_vault.fetch.side_effect = NotFoundExc("cooked archive not found") mock_vault.progress.side_effect = NotFoundExc("cooking request not found") for obj_type, obj_id in ( ("directory", directory), ("revision_gitfast", revision), ): obj_name = obj_type.split("_")[0] url = reverse( f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert ( rv.data["reason"] == f"Cooking of {obj_name} '{obj_id}' was never requested." ) mock_vault.progress.assert_called_with(obj_type, hashutil.hash_to_bytes(obj_id)) for obj_type, obj_id in ( ("directory", unknown_directory), ("revision_gitfast", unknown_revision), ): obj_name = obj_type.split("_")[0] url = reverse( f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id} ) rv = check_api_post_responses(api_client, url, data=None, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert rv.data["reason"] == f"{obj_name.title()} '{obj_id}' not found." mock_vault.cook.assert_called_with( obj_type, hashutil.hash_to_bytes(obj_id), email=None ) fetch_url = reverse( f"api-1-vault-fetch-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) rv = check_api_get_responses(api_client, fetch_url, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert ( rv.data["reason"] == f"Cooked archive for {obj_name} '{obj_id}' not found." ) mock_vault.fetch.assert_called_with(obj_type, hashutil.hash_to_bytes(obj_id))