diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 85e8ef2b..d3e5450d 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,275 +1,238 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service -from swh.web.common.utils import reverse from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup DOC_RETURN_REVISION = """ :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision """ DOC_RETURN_REVISION_ARRAY = DOC_RETURN_REVISION.replace(":>json", ":>jsonarr") def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data ) content = result["content"] if result["type"] == "dir": # dir_entries result["content"] = list(map(enrich_directory_local, content)) elif result["type"] == "file": # content result["content"] = utils.enrich_content(content) elif result["type"] == "rev": # revision result["content"] = utils.enrich_revision(content) return result @api_route( r"/revision/(?P[0-9a-f]+)/", "api-1-revision", checksum_args=["sha1_git"] ) @api_doc("/revision/") @format_docstring(return_revision=DOC_RETURN_REVISION) def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier {common_headers} {return_revision} :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ return api_lookup( service.lookup_revision, sha1_git, notfound_msg="Revision with sha1_git {} not found.".format(sha1_git), enrich_fn=utils.enrich_revision, request=request, ) @api_route( r"/revision/(?P[0-9a-f]+)/raw/", "api-1-revision-raw-message", checksum_args=["sha1_git"], ) @api_doc("/revision/raw/", tags=["hidden"], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw["message"], content_type="application/octet-stream") response["Content-disposition"] = "attachment;filename=rev_%s_raw" % sha1_git return response @api_route( r"/revision/(?P[0-9a-f]+)/directory/", "api-1-revision-directory", checksum_args=["sha1_git"], ) @api_route( r"/revision/(?P[0-9a-f]+)/directory/(?P.+)/", "api-1-revision-directory", checksum_args=["sha1_git"], ) @api_doc("/revision/directory/") @format_docstring() def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ return _revision_directory_by( {"sha1_git": sha1_git}, dir_path, request.path, with_data=with_data ) @api_route( r"/revision/(?P[0-9a-f]+)/log/", "api-1-revision-log", checksum_args=["sha1_git"], ) -@api_route( - r"/revision/(?P[0-9a-f]+)" r"/prev/(?P[0-9a-f]*/*)/log/", - "api-1-revision-log", - checksum_args=["sha1_git", "prev_sha1s"], -) @api_doc("/revision/log/") @format_docstring(return_revision_array=DOC_RETURN_REVISION_ARRAY) -def api_revision_log(request, sha1_git, prev_sha1s=None): +def api_revision_log(request, sha1_git): """ - .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ + .. http:get:: /api/1/revision/(sha1_git)/log/ Get a list of all revisions heading to a given one, in other words show the commit log. + The revisions are returned in the breadth-first search order while + visiting the revision graph. The number of revisions to return is also + bounded by the **limit** query parameter. + + .. warning:: + To get the full BFS traversal of the revision graph when the + total number of revisions is greater than 1000, it is up to + the client to keep track of the multiple branches of history + when there's merge revisions in the returned objects. + In other words, identify all the continuation points that need + to be followed to get the full history through recursion. + :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier - :param string prev_sha1s: optional parameter representing the navigation - breadcrumbs (descendant revisions previously visited). If multiple values, - use / as delimiter. If provided, revisions information will be added at - the beginning of the returned list. - :query int per_page: number of elements in the returned list, for pagination - purpose + :query int limit: maximum number of revisions to return when performing + BFS traversal on the revision graph (default to 10, can not exceed 1000) {common_headers} - {resheader_link} {return_revision_array} :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided - :statuscode 404: requested revision can not be found in the archive + :statuscode 404: head revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ - result = {} - per_page = int(request.query_params.get("per_page", "10")) - - def lookup_revision_log_with_limit(s, limit=per_page + 1): - return service.lookup_revision_log(s, limit) + limit = int(request.query_params.get("limit", "10")) + limit = min(limit, 1000) error_msg = "Revision with sha1_git %s not found." % sha1_git - rev_get = api_lookup( - lookup_revision_log_with_limit, + revisions = api_lookup( + service.lookup_revision_log, sha1_git, + limit, notfound_msg=error_msg, enrich_fn=utils.enrich_revision, request=request, ) - nb_rev = len(rev_get) - if nb_rev == per_page + 1: - rev_backward = rev_get[:-1] - new_last_sha1 = rev_get[-1]["id"] - query_params = {} - - if request.query_params.get("per_page"): - query_params["per_page"] = per_page - - result["headers"] = { - "link-next": reverse( - "api-1-revision-log", - url_args={"sha1_git": new_last_sha1}, - query_params=query_params, - request=request, - ) - } - - else: - rev_backward = rev_get - - if not prev_sha1s: # no nav breadcrumbs, so we're done - revisions = rev_backward - - else: - rev_forward_ids = prev_sha1s.split("/") - rev_forward = api_lookup( - service.lookup_revision_multiple, - rev_forward_ids, - notfound_msg=error_msg, - enrich_fn=utils.enrich_revision, - request=request, - ) - revisions = rev_forward + rev_backward - - result.update({"results": revisions}) - return result + return {"results": revisions} diff --git a/swh/web/tests/api/views/test_revision.py b/swh/web/tests/api/views/test_revision.py index 7ee31f2f..e3a7747b 100644 --- a/swh/web/tests/api/views/test_revision.py +++ b/swh/web/tests/api/views/test_revision.py @@ -1,264 +1,222 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given from swh.web.api.utils import enrich_revision from swh.web.common.exc import NotFoundExc from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import revision @given(revision()) def test_api_revision(api_client, archive_data, revision): url = reverse("api-1-revision", url_args={"sha1_git": revision}) rv = api_client.get(url) expected_revision = archive_data.revision_get(revision) enrich_revision(expected_revision, rv.wsgi_request) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == expected_revision def test_api_revision_not_found(api_client): unknown_revision_ = random_sha1() url = reverse("api-1-revision", url_args={"sha1_git": unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "Revision with sha1_git %s not found." % unknown_revision_, } @given(revision()) def test_api_revision_raw_ok(api_client, archive_data, revision): url = reverse("api-1-revision-raw-message", url_args={"sha1_git": revision}) rv = api_client.get(url) expected_message = archive_data.revision_get(revision)["message"] assert rv.status_code == 200 assert rv["Content-Type"] == "application/octet-stream" assert rv.content == expected_message.encode() def test_api_revision_raw_ko_no_rev(api_client): unknown_revision_ = random_sha1() url = reverse( "api-1-revision-raw-message", url_args={"sha1_git": unknown_revision_} ) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "Revision with sha1_git %s not found." % unknown_revision_, } @given(revision()) def test_api_revision_log(api_client, archive_data, revision): - per_page = 10 + limit = 10 url = reverse( "api-1-revision-log", url_args={"sha1_git": revision}, - query_params={"per_page": per_page}, + query_params={"limit": limit}, ) rv = api_client.get(url) - expected_log = archive_data.revision_log(revision, limit=per_page + 1) + expected_log = archive_data.revision_log(revision, limit=limit) expected_log = list( map(enrich_revision, expected_log, [rv.wsgi_request] * len(expected_log)) ) - has_next = len(expected_log) > per_page - assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" - assert rv.data == (expected_log[:-1] if has_next else expected_log) - - if has_next: - assert "Link" in rv - next_log_url = rv.wsgi_request.build_absolute_uri( - reverse( - "api-1-revision-log", - url_args={"sha1_git": expected_log[-1]["id"]}, - query_params={"per_page": per_page}, - ) - ) - assert next_log_url in rv["Link"] + assert rv.data == expected_log def test_api_revision_log_not_found(api_client): unknown_revision_ = random_sha1() url = reverse("api-1-revision-log", url_args={"sha1_git": unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "Revision with sha1_git %s not found." % unknown_revision_, } assert not rv.has_header("Link") -@given(revision()) -def test_api_revision_log_context(api_client, archive_data, revision): - revisions = archive_data.revision_log(revision, limit=4) - - prev_rev = revisions[0]["id"] - rev = revisions[-1]["id"] - - per_page = 10 - - url = reverse( - "api-1-revision-log", - url_args={"sha1_git": rev, "prev_sha1s": prev_rev}, - query_params={"per_page": per_page}, - ) - - rv = api_client.get(url) - - expected_log = archive_data.revision_log(rev, limit=per_page) - prev_revision = archive_data.revision_get(prev_rev) - expected_log.insert(0, prev_revision) - expected_log = list( - map(enrich_revision, expected_log, [rv.wsgi_request] * len(expected_log)) - ) - - assert rv.status_code == 200, rv.data - assert rv["Content-Type"] == "application/json" - assert rv.data == expected_log - - def test_api_revision_directory_ko_not_found(api_client, mocker): mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") mock_rev_dir.side_effect = NotFoundExc("Not found") rv = api_client.get("/api/1/revision/999/directory/some/path/to/dir/") assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == {"exception": "NotFoundExc", "reason": "Not found"} mock_rev_dir.assert_called_once_with( {"sha1_git": "999"}, "some/path/to/dir", "/api/1/revision/999/directory/some/path/to/dir/", with_data=False, ) def test_api_revision_directory_ok_returns_dir_entries(api_client, mocker): mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") stub_dir = { "type": "dir", "revision": "999", "content": [ { "sha1_git": "789", "type": "file", "target": "101", "target_url": "/api/1/content/sha1_git:101/", "name": "somefile", "file_url": "/api/1/revision/999/directory/some/path/" "somefile/", }, { "sha1_git": "123", "type": "dir", "target": "456", "target_url": "/api/1/directory/456/", "name": "to-subdir", "dir_url": "/api/1/revision/999/directory/some/path/" "to-subdir/", }, ], } mock_rev_dir.return_value = stub_dir rv = api_client.get("/api/1/revision/999/directory/some/path/") stub_dir["content"][0]["target_url"] = rv.wsgi_request.build_absolute_uri( stub_dir["content"][0]["target_url"] ) stub_dir["content"][0]["file_url"] = rv.wsgi_request.build_absolute_uri( stub_dir["content"][0]["file_url"] ) stub_dir["content"][1]["target_url"] = rv.wsgi_request.build_absolute_uri( stub_dir["content"][1]["target_url"] ) stub_dir["content"][1]["dir_url"] = rv.wsgi_request.build_absolute_uri( stub_dir["content"][1]["dir_url"] ) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == stub_dir mock_rev_dir.assert_called_once_with( {"sha1_git": "999"}, "some/path", "/api/1/revision/999/directory/some/path/", with_data=False, ) def test_api_revision_directory_ok_returns_content(api_client, mocker): mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") stub_content = { "type": "file", "revision": "999", "content": { "sha1_git": "789", "sha1": "101", "data_url": "/api/1/content/101/raw/", }, } mock_rev_dir.return_value = stub_content url = "/api/1/revision/666/directory/some/other/path/" rv = api_client.get(url) stub_content["content"]["data_url"] = rv.wsgi_request.build_absolute_uri( stub_content["content"]["data_url"] ) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == stub_content mock_rev_dir.assert_called_once_with( {"sha1_git": "666"}, "some/other/path", url, with_data=False ) @given(revision()) def test_api_revision_uppercase(api_client, revision): url = reverse( "api-1-revision-uppercase-checksum", url_args={"sha1_git": revision.upper()} ) resp = api_client.get(url) assert resp.status_code == 302 redirect_url = reverse("api-1-revision", url_args={"sha1_git": revision}) assert resp["location"] == redirect_url