diff --git a/pristine_zip/delta_to_zipball.py b/pristine_zip/delta_to_zipball.py index 5255dcc..96c7d7f 100644 --- a/pristine_zip/delta_to_zipball.py +++ b/pristine_zip/delta_to_zipball.py @@ -1,46 +1,61 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import subprocess import tempfile from . import compress def genzip(checkout_dir: str, delta_path: str, zipball_path: str): with tempfile.TemporaryDirectory(prefix="pristine-zip-genzip") as work_dir: reference_zipball_path = os.path.join(work_dir, "reference.zip") # generate reference zipball compress.compress(checkout_dir, reference_zipball_path) _apply_delta(reference_zipball_path, zipball_path, work_dir, delta_path) def _apply_delta( reference_zipball_path: str, zipball_path: str, work_dir: str, delta_path: str, ): proc = subprocess.run(["tar", "--extract", "-f", delta_path,], cwd=work_dir) proc.check_returncode() with open(os.path.join(work_dir, "type"), "rb") as fd: type_ = fd.read().decode().strip() assert type_ == "zip", ( f"Unknown zipball type {type}. Are you" f"extracting a delta from pristine-tar instead of pristine-zip?" ) + with open(os.path.join(work_dir, "reference_md5sum"), "rb") as fd: + expected_md5sum = fd.read().decode().strip() + + proc = subprocess.run(["md5sum", reference_zipball_path], capture_output=True) + proc.check_returncode() + actual_md5sum = proc.stdout.decode().strip() + + if actual_md5sum != expected_md5sum: + print( + f"md5sum mismatch between reference zipballs. " + f"This is a bug, please report it along with the original zipball " + f"and the version number of pristine-zip." + ) + exit(1) + xdelta3_path = os.path.join(work_dir, "delta") assert os.path.isfile(xdelta3_path), "Missing 'delta' file in delta archive." _apply_xdelta3(reference_zipball_path, zipball_path, xdelta3_path) def _apply_xdelta3(reference_zipball_path: str, zipball_path: str, xdelta3_path: str): proc = subprocess.run( ["xdelta3", "-d", "-s", reference_zipball_path, xdelta3_path, zipball_path] ) proc.check_returncode() diff --git a/pristine_zip/zipball_to_delta.py b/pristine_zip/zipball_to_delta.py index 3bdfd07..0104929 100644 --- a/pristine_zip/zipball_to_delta.py +++ b/pristine_zip/zipball_to_delta.py @@ -1,90 +1,97 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import subprocess import tempfile from . import compress def gendelta(zipball_path: str, delta_path: str): with tempfile.TemporaryDirectory(prefix="pristine-zip-gendelta") as work_dir: reference_zipball_path = os.path.join(work_dir, "reference.zip") generate_reference_zipball_from_zipball( zipball_path, reference_zipball_path, work_dir ) _generate_delta(zipball_path, reference_zipball_path, work_dir, delta_path) def _generate_delta( upstream_zipball_path: str, reference_zipball_path: str, work_dir: str, delta_path: str, ): _generate_xdelta3( upstream_zipball_path, reference_zipball_path, os.path.join(work_dir, "delta") ) with open(os.path.join(work_dir, "type"), "wb") as fd: fd.write(b"zip\n") - files = ["delta", "type"] + proc = subprocess.run(["md5sum", reference_zipball_path], capture_output=True) + proc.check_returncode() + md5sum = proc.stdout.split(b" ", 1)[0] + + with open(os.path.join(work_dir, "reference_md5sum"), "wb") as fd: + fd.write(md5sum + b"\n") + + files = ["delta", "reference_md5sum", "type"] # Make the timestamps in the delta tarball deterministic for file in files: os.utime(os.path.join(work_dir, file), times=(0, 0)) proc = subprocess.run( [ "tar", # make entries in the delta tarball deterministic: "--owner", "0", "--group", "0", "--numeric-owner", "--mode", "644", # generic options: "--create", "--compress", "-f", delta_path, *files, ], cwd=work_dir, ) proc.check_returncode() def _generate_xdelta3( upstream_zipball_path: str, reference_zipball_path: str, delta_path: str ): """Generates the xdelta3 difference between a reference zipball and the original one.""" proc = subprocess.run( [ "xdelta3", "-e", "-s", reference_zipball_path, upstream_zipball_path, delta_path, ] ) proc.check_returncode() def generate_reference_zipball_from_zipball( upstream_zipball_path: str, reference_zipball_path: str, work_dir: str ): """Unzips an upstream zipball and rezips it in a reference zipball.""" checkout_dir = os.path.join(work_dir, "checkout") os.mkdir(checkout_dir) proc = subprocess.run(["unzip", upstream_zipball_path], cwd=checkout_dir) proc.check_returncode() compress.compress(checkout_dir, reference_zipball_path)