diff --git a/swh/core/tarball.py b/swh/core/tarball.py --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -166,6 +166,13 @@ _unpack_zip(tarpath, dest) else: raise + except NotADirectoryError: + if format and "tar" in format: + # some old tarballs might fail to be unpacked by shutil.unpack_archive, + # fallback using the tar command as last resort + _unpack_tar(tarpath, dest) + else: + raise normalize_permissions(dest) diff --git a/swh/core/tests/test_tarball.py b/swh/core/tests/test_tarball.py --- a/swh/core/tests/test_tarball.py +++ b/swh/core/tests/test_tarball.py @@ -1,12 +1,14 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib +import io import os import shutil import stat +import tarfile import pytest @@ -209,21 +211,38 @@ assert file.stat().st_mode == 0o100644 -def test_unpcompress_zip_imploded(tmp_path, datadir): - """Unpack a zip archive with compression type 6 (implode), - not supported by python zipfile module. - - """ - filename = "msk316src.zip" - zippath = os.path.join(datadir, "archives", filename) - - assert os.path.exists(zippath) +def uncompress_archive_test(archive_path, tmp_path): + assert os.path.exists(archive_path) - extract_dir = os.path.join(tmp_path, filename) + extract_dir = os.path.join(tmp_path, "unpack", os.path.basename(archive_path)) + tarball.uncompress(archive_path, extract_dir) + assert len(os.listdir(extract_dir)) > 0 - tarball.uncompress(zippath, extract_dir) - assert len(os.listdir(extract_dir)) > 0 +def test_uncompress_tar_fallback_when_unpack_archive_failed(tmp_path): + """shutil.unpack_archive can fail unpacking a tarball with missing + directory bit set on folder contained in it while the tar command + succeeds to perform the same task so check the fallback of using + tar command works for such edge case. + """ + archive_path = os.path.join(tmp_path, "repro.tar.gz") + tf = tarfile.open(archive_path, "w:gz") + ti = tarfile.TarInfo("dir") + ti.mode = 0o777 + ti.type = tarfile.DIRTYPE + tf.addfile(ti) + ti = tarfile.TarInfo("dir/file") + tf.addfile(ti, io.BytesIO(b"hello world")) + tf.close() + uncompress_archive_test(archive_path, tmp_path) + + +def test_uncompress_zip_imploded(tmp_path, datadir): + """Unpack a zip archive with compression type 6 (implode), + not supported by python zipfile module. + """ + archive_path = os.path.join(datadir, "archives", "msk316src.zip") + uncompress_archive_test(archive_path, tmp_path) def test_uncompress_upper_archive_extension(tmp_path, datadir):