diff --git a/swh/core/tarball.py b/swh/core/tarball.py --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -64,6 +64,29 @@ ) +def _unpack_jar(jarpath: str, extract_dir: str) -> str: + """Unpack jar files using standard Python module zipfile. + + This expects the `extract_dir` to exist. + + Raises: + shutil.ReadError in case of issue uncompressing the archive (jarpath + does not exist, extract_dir does not exist, etc...) + + Returns: + full path to the uncompressed directory. + + """ + try: + with zipfile.ZipFile(jarpath) as jar: + jar.extractall(path=extract_dir) + return extract_dir + except Exception as e: + raise shutil.ReadError( + f"Unable to uncompress {jarpath} to {extract_dir}. Reason: {e}" + ) + + def register_new_archive_formats(): """Register new archive formats to uncompress""" registered_formats = [f[0] for f in shutil.get_unpack_formats()] @@ -80,6 +103,7 @@ "application/gzip": "gztar", "application/x-lzip": "tar.lz", "application/zip": "zip", + "application/java-archive": "jar", } @@ -192,7 +216,7 @@ ADDITIONAL_ARCHIVE_FORMATS = [ # name, extensions, function ("tar.Z|x", [".tar.Z", ".tar.x"], _unpack_tar), - ("jar", [".jar"], _unpack_zip), + ("jar", [".jar"], _unpack_jar), ("tbz2", [".tbz", "tbz2"], _unpack_tar), # FIXME: make this optional depending on the runtime lzip package install ("tar.lz", [".tar.lz"], _unpack_tar), diff --git a/swh/core/tests/data/archives/hello.jar b/swh/core/tests/data/archives/hello.jar new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@