diff --git a/pristine_zip/reference.py b/pristine_zip/reference.py index 5060c71..2fce63b 100644 --- a/pristine_zip/reference.py +++ b/pristine_zip/reference.py @@ -1,44 +1,56 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Creates a ZIP file deterministically. This can be used a base reference for delta files.""" import os import subprocess +def reset_timestamps(entries): + for entry in entries: + os.utime(entry, (0, 0)) + + def walk(checkout_dir): entries = [] for (dirpath, dirnames, filenames) in os.walk(checkout_dir): assert dirpath.startswith(checkout_dir) dirpath = dirpath[len(checkout_dir) :].lstrip("/") entries.append(dirpath) - entries.extend(os.path.join(dirpath, filename) for filename in filenames) + paths = [os.path.join(dirpath, filename) for filename in filenames] + entries.extend(paths) return entries def compress(checkout_dir: str, target: str): """Generates a reference zipball for the given checked out directory.""" try: os.remove(target) except FileNotFoundError: pass assert os.path.isdir(checkout_dir), checkout_dir + entries_str = walk(checkout_dir) + + reset_timestamps(os.path.join(checkout_dir, entry) for entry in entries_str) + # Encode *before* sorting; sorting on unicode changes across configurations. - entries = [entry.encode() for entry in walk(checkout_dir)] + entries = [entry.encode() for entry in entries_str] # Sort entries ourselves; InfoZIP's zip does not guarantee order entries.sort() # -X = --no-extra, which prevents inclusion of extra non-deterministic # and implementation-dependant data + # -o = --latest-time, which sets the modification time of the zip to that + # of the most recent file proc = subprocess.run( - ["zip", "-X", target, "--names-stdin"], + ["zip", "-X", "-o", target, "--names-stdin"], cwd=checkout_dir, input=b"\n".join(entries), ) proc.check_returncode()