diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -1015,7 +1015,7 @@ if content_type == "swh/generator": with content as path: return FileResponse( - open(path, "rb"), status=status, content_type="application/zip" + open(path, "rb"), status=status, content_type="application/tar" ) if content_type == "application/json": return HttpResponse( diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -49,7 +49,7 @@ # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive( - aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir + aggregated_tarball_rootdir, "tar", aggregated_tarball_rootdir ) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) diff --git a/swh/deposit/tests/api/test_deposit_private_read_archive.py b/swh/deposit/tests/api/test_deposit_private_read_archive.py --- a/swh/deposit/tests/api/test_deposit_private_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_private_read_archive.py @@ -1,14 +1,15 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import io -import zipfile +from os.path import exists, join +import tarfile from django.urls import reverse from rest_framework import status +from swh.deposit.api.private.deposit_read import aggregate_tarballs from swh.deposit.config import EM_IRI, PRIVATE_GET_RAW_CONTENT from swh.deposit.tests.common import create_arborescence_archive @@ -24,25 +25,33 @@ def test_access_to_existing_deposit_with_one_archive( - authenticated_client, deposit_collection, complete_deposit, sample_archive + authenticated_client, + deposit_collection, + complete_deposit, + sample_archive, + tmp_path, ): """Access to deposit should stream a 200 response with its raw content """ deposit = complete_deposit - for url in private_get_raw_url_endpoints(deposit_collection, deposit): - r = authenticated_client.get(url) + for i, url in enumerate(private_get_raw_url_endpoints(deposit_collection, deposit)): + response = authenticated_client.get(url) - assert r.status_code == status.HTTP_200_OK - assert r._headers["content-type"][1] == "application/zip" + assert response.status_code == status.HTTP_200_OK + assert response._headers["content-type"][1] == "application/tar" - # read the stream - data = b"".join(r.streaming_content) - # extract the file from the zip - zfile = zipfile.ZipFile(io.BytesIO(data)) - assert zfile.namelist() == ["file1"] - assert zfile.open("file1").read() == b"some content in file" + # write the response stream in a temporary archive + archive_path = join(tmp_path, f"archive_{i}.tar") + with open(archive_path, "wb") as f: + for chunk in response.streaming_content: + f.write(chunk) + + # to check its properties are correct + tfile = tarfile.open(archive_path) + assert set(tfile.getnames()) == {".", "./file1"} + assert tfile.extractfile("./file1").read() == b"some content in file" def test_access_to_existing_deposit_with_multiple_archives( @@ -72,15 +81,26 @@ ) assert response.status_code == status.HTTP_201_CREATED - for url in private_get_raw_url_endpoints(deposit_collection, deposit): - r = authenticated_client.get(url) - - assert r.status_code == status.HTTP_200_OK - assert r._headers["content-type"][1] == "application/zip" - # read the stream - data = b"".join(r.streaming_content) - # extract the file from the zip - zfile = zipfile.ZipFile(io.BytesIO(data)) - assert set(zfile.namelist()) == {"file1", "file2"} - assert zfile.open("file1").read() == b"some content in file" - assert zfile.open("file2").read() == b"some other content in file" + for i, url in enumerate(private_get_raw_url_endpoints(deposit_collection, deposit)): + response = authenticated_client.get(url) + + assert response.status_code == status.HTTP_200_OK + assert response._headers["content-type"][1] == "application/tar" + # write the response stream in a temporary archive + archive_path = join(tmp_path, f"archive_{i}.tar") + with open(archive_path, "wb") as f: + for chunk in response.streaming_content: + f.write(chunk) + + # to check its properties are correct + tfile = tarfile.open(archive_path) + assert set(tfile.getnames()) == {".", "./file1", "./file2"} + assert tfile.extractfile("./file1").read() == b"some content in file" + assert tfile.extractfile("./file2").read() == b"some other content in file" + + +def test_aggregate_tarballs_with_strange_archive(datadir, tmp_path): + archive = join(datadir, "archives", "single-artifact-package.tar.gz") + + with aggregate_tarballs(tmp_path, [archive]) as tarball_path: + assert exists(tarball_path) diff --git a/swh/deposit/tests/data/archives/single-artifact-package.tar.gz b/swh/deposit/tests/data/archives/single-artifact-package.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@