diff --git a/CONTRIBUTORS b/CONTRIBUTORS --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1 +1,2 @@ Thibault Allançon +Franck Bret diff --git a/swh/core/tarball.py b/swh/core/tarball.py --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -87,6 +87,27 @@ ) +def _unpack_zst(zstpath: str, extract_dir: str) -> str: + """Unpack zst files unsupported by the standard python library. Example + include tar.zst + + This expects the `extract_dir` to exist. + + Raises: + shutil.ReadError in case of issue uncompressing the archive (zstpath + """ + try: + run( + ["tar", "--force-local", "-I 'zstd'", "-xf", zstpath, "-C", extract_dir], + check=True, + ) + return extract_dir + except Exception as e: + raise shutil.ReadError( + f"Unable to uncompress {zstpath} to {extract_dir}. Reason: {e}" + ) + + def register_new_archive_formats(): """Register new archive formats to uncompress""" registered_formats = [f[0] for f in shutil.get_unpack_formats()] @@ -104,6 +125,8 @@ "application/x-lzip": "tar.lz", "application/zip": "zip", "application/java-archive": "jar", + "application/zstd": "tar.zst", + "application/x-zstd": "tar.zst", } @@ -221,6 +244,7 @@ # FIXME: make this optional depending on the runtime lzip package install ("tar.lz", [".tar.lz"], _unpack_tar), ("crate", [".crate"], _unpack_tar), + ("tar.zst", [".tar.zst", ".tar.zstd"], _unpack_zst), ] register_new_archive_formats() diff --git a/swh/core/tests/data/archives/ca-certificates-20210603-1-any.pkg.tar.zst b/swh/core/tests/data/archives/ca-certificates-20210603-1-any.pkg.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ 0 +def test_uncompress_tarzst(tmp_path, datadir): + """Unpack supported zst tarball into an existent folder should be ok""" + filename = "ca-certificates-20210603-1-any.pkg.tar.zst" + tarpath = os.path.join(datadir, "archives", filename) + + assert os.path.exists(tarpath) + + extract_dir = os.path.join(tmp_path, filename) + + tarball.uncompress(tarpath, extract_dir) + + assert len(os.listdir(extract_dir)) > 0 + + def test_register_new_archive_formats(prepare_shutil_state): """Registering new archive formats should be fine""" unpack_formats_v1 = [f[0] for f in shutil.get_unpack_formats()]