Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9124980
D8108.id29280.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D8108.id29280.diff
View Options
diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py
--- a/swh/web/api/views/content.py
+++ b/swh/web/api/views/content.py
@@ -4,9 +4,10 @@
# See top-level LICENSE file for more information
import functools
+import io
from typing import Optional
-from django.http import HttpResponse
+from django.http import FileResponse
from rest_framework.request import Request
from swh.web.api import utils
@@ -183,6 +184,31 @@
)
+class _ROBytesIO(io.BufferedIOBase):
+ """Like BytesIO, but read-only so it does not need to copy the buffer it contains"""
+
+ def __init__(self, initial_bytes):
+ self._offset = 0
+ self._buffer = initial_bytes
+
+ def read(self, size=-1):
+ old_offset = self._offset
+ if size < 0:
+ self._offset = len(self._index)
+ else:
+ self._offset += len(self._buffer)
+ return self._buffer[old_offset : self._offset]
+
+ def read1(self, size=-1):
+ return self.read(size)
+
+ def detach(self, b):
+ self._unsupported("detach")
+
+ def write(self, b):
+ self._unsupported("write")
+
+
@api_route(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/raw/",
"api-1-content-raw",
@@ -216,10 +242,6 @@
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/`
"""
-
- def generate(content):
- yield content["data"]
-
content_raw = archive.lookup_content_raw(q)
if not content_raw:
raise NotFoundExc("Content %s is not found." % q)
@@ -228,10 +250,13 @@
if not filename:
filename = "content_%s_raw" % q.replace(":", "_")
- response = HttpResponse(
- generate(content_raw), content_type="application/octet-stream"
+ response = FileResponse(
+ _ROBytesIO(content_raw["data"]),
+ filename=filename,
+ content_type="application/octet-stream",
+ as_attachment=True,
)
- response["Content-disposition"] = "attachment; filename=%s" % filename
+ response["Content-Length"] = len(content_raw["data"])
return response
diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py
--- a/swh/web/tests/api/views/test_content.py
+++ b/swh/web/tests/api/views/test_content.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -236,10 +236,11 @@
assert rv["Content-Type"] == "application/octet-stream"
assert (
rv["Content-disposition"]
- == "attachment; filename=content_sha1_%s_raw" % content["sha1"]
+ == 'attachment; filename="content_sha1_%s_raw"' % content["sha1"]
)
expected_data = archive_data.content_get_data(content["sha1"])
- assert rv.content == expected_data["data"]
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
def test_api_content_raw_text_with_filename(api_client, archive_data, content):
@@ -249,10 +250,51 @@
query_params={"filename": "filename.txt"},
)
rv = check_http_get_response(api_client, url, status_code=200)
- assert rv["Content-disposition"] == "attachment; filename=filename.txt"
+ assert rv["Content-disposition"] == 'attachment; filename="filename.txt"'
assert rv["Content-Type"] == "application/octet-stream"
expected_data = archive_data.content_get_data(content["sha1"])
- assert rv.content == expected_data["data"]
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
+
+
+@pytest.mark.parametrize(
+ "encoded,expected",
+ [
+ # From https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.2
+ (
+ "%c2%a3%20and%20%e2%82%ac%20rates.txt",
+ "%C2%A3%20and%20%E2%82%AC%20rates.txt",
+ ),
+ ("%A3%20rates.txt", "%EF%BF%BD%20rates.txt"),
+ # found in the wild
+ (
+ "Th%C3%A9orie%20de%20sant%C3%A9-aide-justice.pdf",
+ "Th%C3%A9orie%20de%20sant%C3%A9-aide-justice.pdf",
+ ),
+ ],
+)
+def test_api_content_raw_text_with_nonascii_filename(
+ api_client, archive_data, content, encoded, expected
+):
+ url = reverse(
+ "api-1-content-raw",
+ url_args={"q": "sha1:%s" % content["sha1"]},
+ )
+ rv = check_http_get_response(
+ api_client, f"{url}?filename={encoded}", status_code=200
+ )
+
+ # technically, ISO8859-1 is allowed too
+ assert rv["Content-disposition"].isascii(), rv["Content-disposition"]
+
+ assert rv["Content-disposition"] == (
+ f"attachment; filename*=utf-8''{expected}"
+ ), rv["Content-disposition"]
+
+ assert rv["Content-Type"] == "application/octet-stream"
+ expected_data = archive_data.content_get_data(content["sha1"])
+ assert b"".join(rv.streaming_content) == expected_data["data"]
+ assert int(rv["Content-Length"]) == len(expected_data["data"])
def test_api_check_content_known(api_client, content):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jun 20, 7:51 PM (3 w, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227735
Attached To
D8108: Add support for non-ASCII filenames in api_content_raw (option 1: use FileResponse)
Event Timeline
Log In to Comment