Changeset View
Changeset View
Standalone View
Standalone View
swh/web/api/views/raw.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from django.http import HttpResponse | from django.http import HttpResponse | ||||
from rest_framework.exceptions import PermissionDenied | from rest_framework.exceptions import PermissionDenied | ||||
from rest_framework.request import Request | |||||
from swh.model import model | from swh.model import model | ||||
from swh.model.git_objects import ( | from swh.model.git_objects import ( | ||||
content_git_object, | content_git_object, | ||||
directory_git_object, | directory_git_object, | ||||
release_git_object, | release_git_object, | ||||
revision_git_object, | revision_git_object, | ||||
snapshot_git_object, | snapshot_git_object, | ||||
Show All 12 Lines | |||||
@api_route( | @api_route( | ||||
f"/raw/(?P<swhid>{SWHID_RE})/", | f"/raw/(?P<swhid>{SWHID_RE})/", | ||||
"api-1-raw-object", | "api-1-raw-object", | ||||
throttle_scope="swh_raw_object", | throttle_scope="swh_raw_object", | ||||
) | ) | ||||
@api_doc("/raw/") | @api_doc("/raw/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_raw_object(request, swhid): | def api_raw_object(request: Request, swhid: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/raw/(swhid)/ | .. http:get:: /api/1/raw/(swhid)/ | ||||
Get the object corresponding to the SWHID in raw form. | Get the object corresponding to the SWHID in raw form. | ||||
This endpoint exposes the internal representation (see the | This endpoint exposes the internal representation (see the | ||||
``*_git_object`` functions in :mod:`swh.model.git_objects`), and | ``*_git_object`` functions in :mod:`swh.model.git_objects`), and | ||||
so can be used to fetch a binary blob which hashes to the same | so can be used to fetch a binary blob which hashes to the same | ||||
Show All 14 Lines | .. http:get:: /api/1/raw/(swhid)/ | ||||
.. parsed-literal:: | .. parsed-literal:: | ||||
:swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a` | :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a` | ||||
""" | """ | ||||
if not (request.user.is_staff or request.user.has_perm(API_RAW_OBJECT_PERMISSION)): | if not (request.user.is_staff or request.user.has_perm(API_RAW_OBJECT_PERMISSION)): | ||||
raise PermissionDenied() | raise PermissionDenied() | ||||
swhid = CoreSWHID.from_string(swhid) | parsed_swhid = CoreSWHID.from_string(swhid) | ||||
object_id = swhid.object_id | object_id = parsed_swhid.object_id | ||||
object_type = swhid.object_type | object_type = parsed_swhid.object_type | ||||
def not_found(): | def not_found(): | ||||
return NotFoundExc(f"Object with id {swhid} not found.") | return NotFoundExc(f"Object with id {swhid} not found.") | ||||
if object_type == ObjectType.CONTENT: | if object_type == ObjectType.CONTENT: | ||||
results = archive.storage.content_find({"sha1_git": object_id}) | results = archive.storage.content_find({"sha1_git": object_id}) | ||||
if len(results) == 0: | if len(results) == 0: | ||||
raise not_found() | raise not_found() | ||||
cnt = results[0] | cnt = results[0] | ||||
# `cnt.with_data()` unfortunately doesn't seem to work. | # `cnt.with_data()` unfortunately doesn't seem to work. | ||||
if cnt.data is None: | if cnt.data is None: | ||||
d = cnt.to_dict() | d = cnt.to_dict() | ||||
d["data"] = archive.storage.content_get_data(cnt.sha1) | d["data"] = archive.storage.content_get_data(cnt.sha1) | ||||
cnt = model.Content.from_dict(d) | cnt = model.Content.from_dict(d) | ||||
assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist" | assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist" | ||||
result = content_git_object(cnt) | result = content_git_object(cnt) | ||||
elif object_type == ObjectType.DIRECTORY: | elif object_type == ObjectType.DIRECTORY: | ||||
result = directory_get(archive.storage, object_id) | dir_ = directory_get(archive.storage, object_id) | ||||
if result is None: | if dir_ is None: | ||||
raise not_found() | raise not_found() | ||||
result = directory_git_object(result) | result = directory_git_object(dir_) | ||||
vlorentz: avoids shadowing a built-in | |||||
elif object_type == ObjectType.REVISION: | elif object_type == ObjectType.REVISION: | ||||
result = archive.storage.revision_get([object_id], ignore_displayname=True)[0] | rev = archive.storage.revision_get([object_id], ignore_displayname=True)[0] | ||||
if result is None: | if rev is None: | ||||
raise not_found() | raise not_found() | ||||
result = revision_git_object(result) | result = revision_git_object(rev) | ||||
elif object_type == ObjectType.RELEASE: | elif object_type == ObjectType.RELEASE: | ||||
result = archive.storage.release_get([object_id], ignore_displayname=True)[0] | rel = archive.storage.release_get([object_id], ignore_displayname=True)[0] | ||||
if result is None: | if rel is None: | ||||
raise not_found() | raise not_found() | ||||
result = release_git_object(result) | result = release_git_object(rel) | ||||
elif object_type == ObjectType.SNAPSHOT: | elif object_type == ObjectType.SNAPSHOT: | ||||
result = snapshot_get_all_branches(archive.storage, object_id) | snp = snapshot_get_all_branches(archive.storage, object_id) | ||||
if result is None: | if snp is None: | ||||
raise not_found() | raise not_found() | ||||
result = snapshot_git_object(result) | result = snapshot_git_object(snp) | ||||
else: | else: | ||||
raise ValueError(f"Unexpected object type variant: {object_type}") | raise ValueError(f"Unexpected object type variant: {object_type}") | ||||
response = HttpResponse(result, content_type="application/octet-stream") | response = HttpResponse(result, content_type="application/octet-stream") | ||||
filename = str(swhid).replace(":", "_") + "_raw" | filename = swhid.replace(":", "_") + "_raw" | ||||
response["Content-disposition"] = f"attachment; filename={filename}" | response["Content-disposition"] = f"attachment; filename={filename}" | ||||
return response | return response |
avoids shadowing a built-in