Page MenuHomeSoftware Heritage

D8108.id29280.diff
No OneTemporary

D8108.id29280.diff

diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py
--- a/swh/web/api/views/content.py
+++ b/swh/web/api/views/content.py
@@ -4,9 +4,10 @@
# See top-level LICENSE file for more information
import functools
+import io
from typing import Optional
-from django.http import HttpResponse
+from django.http import FileResponse
from rest_framework.request import Request
from swh.web.api import utils
@@ -183,6 +184,31 @@
)
+class _ROBytesIO(io.BufferedIOBase):
+ """Like BytesIO, but read-only so it does not need to copy the buffer it contains"""
+
+ def __init__(self, initial_bytes):
+ self._offset = 0
+ self._buffer = initial_bytes
+
+ def read(self, size=-1):
+ old_offset = self._offset
+ if size < 0:
+ self._offset = len(self._index)
+ else:
+ self._offset += len(self._buffer)
+ return self._buffer[old_offset : self._offset]
+
+ def read1(self, size=-1):
+ return self.read(size)
+
+ def detach(self, b):
+ self._unsupported("detach")
+
+ def write(self, b):
+ self._unsupported("write")
+
+
@api_route(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/raw/",
"api-1-content-raw",
@@ -216,10 +242,6 @@
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/`
"""
-
- def generate(content):
- yield content["data"]
-
content_raw = archive.lookup_content_raw(q)
if not content_raw:
raise NotFoundExc("Content %s is not found." % q)
@@ -228,10 +250,13 @@
if not filename:
filename = "content_%s_raw" % q.replace(":", "_")
- response = HttpResponse(
- generate(content_raw), content_type="application/octet-stream"
+ response = FileResponse(
+ _ROBytesIO(content_raw["data"]),
+ filename=filename,
+ content_type="application/octet-stream",
+ as_attachment=True,
)
- response["Content-disposition"] = "attachment; filename=%s" % filename
+ response["Content-Length"] = len(content_raw["data"])
return response
diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py
--- a/swh/web/tests/api/views/test_content.py
+++ b/swh/web/tests/api/views/test_content.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -236,10 +236,11 @@
assert rv["Content-Type"] == "application/octet-stream"
assert (
rv["Content-disposition"]
- == "attachment; filename=content_sha1_%s_raw" % content["sha1"]
+ == 'attachment; filename="content_sha1_%s_raw"' % content["sha1"]
)
expected_data = archive_data.content_get_data(content["sha1"])
- assert rv.content == expected_data["data"]
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
def test_api_content_raw_text_with_filename(api_client, archive_data, content):
@@ -249,10 +250,51 @@
query_params={"filename": "filename.txt"},
)
rv = check_http_get_response(api_client, url, status_code=200)
- assert rv["Content-disposition"] == "attachment; filename=filename.txt"
+ assert rv["Content-disposition"] == 'attachment; filename="filename.txt"'
assert rv["Content-Type"] == "application/octet-stream"
expected_data = archive_data.content_get_data(content["sha1"])
- assert rv.content == expected_data["data"]
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
+
+
+@pytest.mark.parametrize(
+ "encoded,expected",
+ [
+ # From https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.2
+ (
+ "%c2%a3%20and%20%e2%82%ac%20rates.txt",
+ "%C2%A3%20and%20%E2%82%AC%20rates.txt",
+ ),
+ ("%A3%20rates.txt", "%EF%BF%BD%20rates.txt"),
+ # found in the wild
+ (
+ "Th%C3%A9orie%20de%20sant%C3%A9-aide-justice.pdf",
+ "Th%C3%A9orie%20de%20sant%C3%A9-aide-justice.pdf",
+ ),
+ ],
+)
+def test_api_content_raw_text_with_nonascii_filename(
+ api_client, archive_data, content, encoded, expected
+):
+ url = reverse(
+ "api-1-content-raw",
+ url_args={"q": "sha1:%s" % content["sha1"]},
+ )
+ rv = check_http_get_response(
+ api_client, f"{url}?filename={encoded}", status_code=200
+ )
+
+ # technically, ISO8859-1 is allowed too
+ assert rv["Content-disposition"].isascii(), rv["Content-disposition"]
+
+ assert rv["Content-disposition"] == (
+ f"attachment; filename*=utf-8''{expected}"
+ ), rv["Content-disposition"]
+
+ assert rv["Content-Type"] == "application/octet-stream"
+ expected_data = archive_data.content_get_data(content["sha1"])
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
def test_api_check_content_known(api_client, content):

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 20, 7:51 PM (3 w, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227735

Event Timeline