diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py new file mode 100644 --- /dev/null +++ b/swh/web/api/views/raw.py @@ -0,0 +1,112 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.http import HttpResponse + +from swh.model.git_objects import ( + directory_git_object, + revision_git_object, + release_git_object, + snapshot_git_object, +) +from swh.model.model import Directory, Revision, Release, Snapshot +from swh.model.swhids import CoreSWHID, ObjectType +from swh.web.api.apidoc import api_doc, format_docstring +from swh.web.api.apiurls import api_route +from swh.web.api.views.utils import api_lookup +from swh.web.common import archive +from swh.web.common.exc import BadInputExc, NotFoundExc + + +@api_route( + r"/raw/(?P{SWHID_RE})/", + "api-1-raw-object", +) +@api_doc("/raw") +@format_docstring() +def api_raw_object(_request, swhid_r): + """ + .. http:get:: /api/1/raw/ + + Get the object corresponding to the SWHID in raw form. + + This endpoint exposes the internal representation (see + :func:`swh.model.git_objects.*_git_object` in our data + model module for details), and so can be used to fetch a binary + blob which hashes to the same identifier. + + :param string swhid: the object's SWHID + + :resheader Content-Type: application/octet-stream + + :statuscode 200: no error + :statuscode 400: an invalid SWHID has been provided + :statuscode 404: the requested object can not be found in the archive + + **Example:** + + .. parsed-literal:: + + :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a` + """ + + swhid = CoreSWHID.from_string(swhid_r) + + if swhid.object_type == ObjectType.CONTENT: + # No swh.model.git_objects.content_git_object yet + def generate(content): + yield content["data"] + + content_raw = archive.lookup_content_raw(f"sha1_git:{swhid_r}") + if not content_raw: + raise NotFoundExc(f"Content {swhid_r} is not found.") + results = generate(content_raw) + + elif swhid.object_type == ObjectType.DIRECTORY: + data = api_lookup( + archive.lookup_directory, + swhid.object_id, + notfound_msg=f"Directory with id {swhid_r} not found.", + ) + parsed = Directory.from_dict(data) + results = directory_git_object(parsed) + + elif swhid.object_type == ObjectType.REVISION: + data = api_lookup( + archive.lookup_revision, + swhid.object_id, + notfound_msg=f"Revision with id {swhid_r} not found.", + ) + parsed = Revision.from_dict(data) + results = revision_git_object(parsed) + + elif swhid.object_type == ObjectType.RELEASE: + data = api_lookup( + archive.lookup_release, + swhid.object_id, + notfound_msg=f"Release with id {swhid_r} not found.", + ) + parsed = Release.from_dict(data) + results = release_git_object(parsed) + + elif swhid.object_type == ObjectType.SNAPSHOT: + data = api_lookup( + archive.lookup_directory, + swhid.object_id, + branch_name_exclude_prefix=None, + notfound_msg=f"Snapshot with id {swhid_r} not found.", + ) + parsed = Snapshot.from_dict(data) + results = snapshot_git_object(parsed) + + else: + raise BadInputExc("Imposible: checked all object types") + + response = HttpResponse(results, content_type="application/octet-stream") + response["Content-disposition"] = ( + "attachment;filename=%s_raw" % swhid.replace(":", "_") + ) + + return response