diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py new file mode 100644 --- /dev/null +++ b/swh/web/api/views/raw.py @@ -0,0 +1,89 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.http import HttpResponse + +from swh.model.git_objects import ( + # content_git_object, + directory_git_object, + revision_git_object, + release_git_object, + snapshot_git_object, +) +import swh.model.model +from swh.model.swhids import CoreSWHID, ObjectType +from swh.web.api.apidoc import api_doc, format_docstring +from swh.web.api.apiurls import api_route +from swh.web.api.views.utils import api_lookup + + +@api_route( + r"/raw/(?P{SWHID_RE})/", + "api-1-raw-object", +) +@api_doc("/raw") +@format_docstring() +def api_raw_object(_request, swhid_r): + """ + .. http:get:: /api/1/raw/ + + Get the object corresponding to the SWHID in raw form. + + This endpoint exposes the internal representation (see + :func:`swh.model.git_objects.*_git_object` in our data + model module for details), and so can be used to fetch a binary + blob which hashes to the same identifier. + + :param string swhid: the object's SWHID + + :resheader Content-Type: application/octet-stream + + :statuscode 200: no error + :statuscode 400: an invalid SWHID has been provided + :statuscode 404: the requested object can not be found in the archive + + **Example:** + + .. parsed-literal:: + + :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a` + """ + + swhid = CoreSWHID.from_string(swhid_r) + + data = api_lookup( + lookup_object, + swhid.object_type, + swhid.object_id, + complete=True, + notfound_msg=f"Object with id {swhid_r} not found.", + ) + + json_to_git = { + # ObjectType.CONTENT: lambda obj: content_git_object( + # model.Content.from_dict(obj) + # ), + ObjectType.DIRECTORY: lambda obj: directory_git_object( + model.Directory.from_dict(obj) + ), + ObjectType.REVISION: lambda obj: revision_git_object( + model.Revision.from_dict(obj) + ), + ObjectType.RELEASE: lambda obj: release_git_object( + model.Release.from_dict(obj) + ), + ObjectType.SNAPSHOT: lambda obj: snapshot_git_object( + model.Snapshot.from_dict(obj) + ), + }[swhid.object_type] + + results = json_to_git(data) + + response = HttpResponse(results, content_type="application/octet-stream") + response["Content-disposition"] = "attachment;filename=%s_raw" % swhid.replace( + ":", "_" + ) + + return response diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -1372,7 +1372,9 @@ return _RevisionsWalkerProxy(rev_walker_type, rev_start, *args, **kwargs) -def lookup_object(object_type: ObjectType, object_id: str) -> Dict[str, Any]: +def lookup_object( + object_type: ObjectType, object_id: str, *, complete: bool = False +) -> Dict[str, Any]: """ Utility function for looking up an object in the archive by its type and id. @@ -1383,6 +1385,11 @@ object_id (str): the *sha1_git* checksum identifier in hexadecimal form of the object to lookup + complete (bool): Whether to include full objects, rather than their + default truncations. + + Currently just affects snapshots. + Returns: Dict[str, Any]: A dictionary describing the object or a list of dictionary for the directory object type. @@ -1401,7 +1408,10 @@ elif object_type == ObjectType.REVISION: return lookup_revision(object_id) elif object_type == ObjectType.SNAPSHOT: - return lookup_snapshot(object_id) + if complete: + return lookup_snapshot(object_id, branch_name_exclude_prefix=None) + else: + return lookup_snapshot(object_id) else: raise ValueError(f"Unexpected object type variant: {object_type}") diff --git a/swh/web/tests/api/views/test_raw.py b/swh/web/tests/api/views/test_raw.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/api/views/test_raw.py @@ -0,0 +1,70 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest +import hashlib + +from swh.web.common.utils import reverse +from swh.model.git_objects import ( + directory_git_object, + revision_git_object, + release_git_object, + snapshot_git_object, +) +from swh.web.tests.conftest import ctags_json_missing, fossology_missing +from swh.web.tests.data import random_content +from swh.web.tests.utils import ( + check_api_get_responses, + check_api_post_responses, + check_http_get_response, +) + + +# def test_api_raw_content_ko_not_found(api_client): +# unknown_content_ = random_content() +# +# url = reverse( +# "api-1-raw-object", url_args={"q": "sha1_git:%s" % unknown_content_["sha1_git"]} +# ) +# rv = check_api_get_responses(api_client, url, status_code=404) +# assert rv.data == { +# "exception": "NotFoundExc", +# "reason": "Content with sha1 checksum equals to %s not found!" +# % unknown_content_["sha1_git"], +# } + + +# def test_api_raw_content(api_client, archive_data, content): +# object_id = content["sha1_git"] +# object_ty = "cnt" +# url = reverse( +# "api-1-raw-object", url_args={"q": f"swh:1:{object_ty}:{object_id}"} +# ) +# +# rv = check_http_get_response(api_client, url, status_code=200) +# assert rv["Content-Type"] == "application/octet-stream" +# assert ( +# rv["Content-disposition"] +# == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw" +# ) +# expected_data = archive_data.content_get_data(object_id) +# assert rv.content == content_git_object(expected_data) + + +def test_api_raw_directory(api_client, archive_data, directory): + object_id = directory + object_ty = "dir" + url = reverse( + "api-1-raw-object", url_args={"q": f"swh:1:{object_ty}:{object_id}"} + ) + + rv = check_http_get_response(api_client, url, status_code=200) + assert rv["Content-Type"] == "application/octet-stream" + assert ( + rv["Content-disposition"] + == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw" + ) + sha1_git = hashlib.new("sha1", rv.content).digest() + assert sha1_git == object_id