diff --git a/pristine_zip/reference.py b/pristine_zip/reference.py index 2fce63b..8b648a8 100644 --- a/pristine_zip/reference.py +++ b/pristine_zip/reference.py @@ -1,56 +1,73 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Creates a ZIP file deterministically. This can be used a base reference for delta files.""" import os import subprocess -def reset_timestamps(entries): +def normalize_timestamps(entries): for entry in entries: os.utime(entry, (0, 0)) +def normalize_perms(checkout_dir): + # Info-ZIP has an extension that allows storing permissions; but not all + # implementations support it (eg. not Python). So let's normalize by + # wiping all permissions. + for dir_entry in os.scandir(checkout_dir): + if dir_entry.is_dir(follow_symlinks=False): + os.chmod(dir_entry.path, 0o100755) + normalize_perms(dir_entry.path) + elif dir_entry.is_file(follow_symlinks=False): + os.chmod(dir_entry.path, 0o100644) + elif dir_entry.is_symlink(): + os.chmod(dir_entry.path, 0o120000) + else: + assert False, f"Unknown DirEntry type: {dir_entry}" + + def walk(checkout_dir): entries = [] for (dirpath, dirnames, filenames) in os.walk(checkout_dir): assert dirpath.startswith(checkout_dir) dirpath = dirpath[len(checkout_dir) :].lstrip("/") entries.append(dirpath) paths = [os.path.join(dirpath, filename) for filename in filenames] entries.extend(paths) return entries def compress(checkout_dir: str, target: str): """Generates a reference zipball for the given checked out directory.""" try: os.remove(target) except FileNotFoundError: pass assert os.path.isdir(checkout_dir), checkout_dir entries_str = walk(checkout_dir) - reset_timestamps(os.path.join(checkout_dir, entry) for entry in entries_str) + normalize_perms(checkout_dir) + normalize_timestamps(os.path.join(checkout_dir, entry) for entry in entries_str) # Encode *before* sorting; sorting on unicode changes across configurations. entries = [entry.encode() for entry in entries_str] # Sort entries ourselves; InfoZIP's zip does not guarantee order entries.sort() # -X = --no-extra, which prevents inclusion of extra non-deterministic # and implementation-dependant data # -o = --latest-time, which sets the modification time of the zip to that # of the most recent file proc = subprocess.run( ["zip", "-X", "-o", target, "--names-stdin"], cwd=checkout_dir, input=b"\n".join(entries), ) proc.check_returncode()