diff --git a/dulwich/object_store.py b/dulwich/object_store.py index 3656ceef..551f9c1f 100644 --- a/dulwich/object_store.py +++ b/dulwich/object_store.py @@ -1,1601 +1,1604 @@ # object_store.py -- Object store for git objects # Copyright (C) 2008-2013 Jelmer Vernooij # and others # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Git object store interfaces and implementation.""" from io import BytesIO import os import stat import sys from dulwich.diff_tree import ( tree_changes, walk_trees, ) from dulwich.errors import ( NotTreeError, ) from dulwich.file import GitFile from dulwich.objects import ( Commit, ShaFile, Tag, Tree, ZERO_SHA, hex_to_sha, sha_to_hex, hex_to_filename, S_ISGITLINK, object_class, valid_hexsha, ) from dulwich.pack import ( Pack, PackData, PackInflater, PackFileDisappeared, load_pack_index_file, iter_sha1, pack_objects_to_data, write_pack_header, write_pack_index_v2, write_pack_data, write_pack_object, compute_file_sha, PackIndexer, PackStreamCopier, ) from dulwich.protocol import DEPTH_INFINITE from dulwich.refs import ANNOTATED_TAG_SUFFIX INFODIR = "info" PACKDIR = "pack" -PACK_MODE = 0o444 +# use permissions consistent with Git; just readable by everyone +# TODO: should packs also be non-writable on Windows? if so, that +# would requite some rather significant adjustments to the test suite +PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 class BaseObjectStore(object): """Object store interface.""" def determine_wants_all(self, refs, depth=None): def _want_deepen(sha): if not depth: return False if depth == DEPTH_INFINITE: return True return depth > self._get_depth(sha) return [ sha for (ref, sha) in refs.items() if (sha not in self or _want_deepen(sha)) and not ref.endswith(ANNOTATED_TAG_SUFFIX) and not sha == ZERO_SHA ] def iter_shas(self, shas): """Iterate over the objects for the specified shas. Args: shas: Iterable object with SHAs Returns: Object iterator """ return ObjectStoreIterator(self, shas) def contains_loose(self, sha): """Check if a particular object is present by SHA1 and is loose.""" raise NotImplementedError(self.contains_loose) def contains_packed(self, sha): """Check if a particular object is present by SHA1 and is packed.""" raise NotImplementedError(self.contains_packed) def __contains__(self, sha): """Check if a particular object is present by SHA1. This method makes no distinction between loose and packed objects. """ return self.contains_packed(sha) or self.contains_loose(sha) @property def packs(self): """Iterable of pack objects.""" raise NotImplementedError def get_raw(self, name): """Obtain the raw text for an object. Args: name: sha for the object. Returns: tuple with numeric type and object contents. """ raise NotImplementedError(self.get_raw) def __getitem__(self, sha): """Obtain an object by SHA1.""" type_num, uncomp = self.get_raw(sha) return ShaFile.from_raw_string(type_num, uncomp, sha=sha) def __iter__(self): """Iterate over the SHAs that are present in this store.""" raise NotImplementedError(self.__iter__) def add_object(self, obj): """Add a single object to this object store.""" raise NotImplementedError(self.add_object) def add_objects(self, objects, progress=None): """Add a set of objects to this object store. Args: objects: Iterable over a list of (object, path) tuples """ raise NotImplementedError(self.add_objects) def add_pack_data(self, count, pack_data, progress=None): """Add pack data to this object store. Args: num_items: Number of items to add pack_data: Iterator over pack data tuples """ if count == 0: # Don't bother writing an empty pack file return f, commit, abort = self.add_pack() try: write_pack_data( f, count, pack_data, progress, compression_level=self.pack_compression_level, ) except BaseException: abort() raise else: return commit() def tree_changes( self, source, target, want_unchanged=False, include_trees=False, change_type_same=False, rename_detector=None, ): """Find the differences between the contents of two trees Args: source: SHA1 of the source tree target: SHA1 of the target tree want_unchanged: Whether unchanged files should be reported include_trees: Whether to include trees change_type_same: Whether to report files changing type in the same entry. Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) """ for change in tree_changes( self, source, target, want_unchanged=want_unchanged, include_trees=include_trees, change_type_same=change_type_same, rename_detector=rename_detector, ): yield ( (change.old.path, change.new.path), (change.old.mode, change.new.mode), (change.old.sha, change.new.sha), ) def iter_tree_contents(self, tree_id, include_trees=False): """Iterate the contents of a tree and all subtrees. Iteration is depth-first pre-order, as in e.g. os.walk. Args: tree_id: SHA1 of the tree. include_trees: If True, include tree objects in the iteration. Returns: Iterator over TreeEntry namedtuples for all the objects in a tree. """ for entry, _ in walk_trees(self, tree_id, None): if ( entry.mode is not None and not stat.S_ISDIR(entry.mode) ) or include_trees: yield entry def find_missing_objects( self, haves, wants, shallow=None, progress=None, get_tagged=None, get_parents=lambda commit: commit.parents, depth=None, ): """Find the missing objects required for a set of revisions. Args: haves: Iterable over SHAs already in common. wants: Iterable over SHAs of objects to fetch. shallow: Set of shallow commit SHA1s to skip progress: Simple progress function that will be called with updated progress strings. get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags. get_parents: Optional function for getting the parents of a commit. Returns: Iterator over (sha, path) pairs. """ finder = MissingObjectFinder( self, haves, wants, shallow, progress, get_tagged, get_parents=get_parents, ) return iter(finder.next, None) def find_common_revisions(self, graphwalker): """Find which revisions this store has in common using graphwalker. Args: graphwalker: A graphwalker object. Returns: List of SHAs that are in common """ haves = [] sha = next(graphwalker) while sha: if sha in self: haves.append(sha) graphwalker.ack(sha) sha = next(graphwalker) return haves def generate_pack_contents(self, have, want, shallow=None, progress=None): """Iterate over the contents of a pack file. Args: have: List of SHA1s of objects that should not be sent want: List of SHA1s of objects that should be sent shallow: Set of shallow commit SHA1s to skip progress: Optional progress reporting method """ missing = self.find_missing_objects(have, want, shallow, progress) return self.iter_shas(missing) def generate_pack_data( self, have, want, shallow=None, progress=None, ofs_delta=True ): """Generate pack data objects for a set of wants/haves. Args: have: List of SHA1s of objects that should not be sent want: List of SHA1s of objects that should be sent shallow: Set of shallow commit SHA1s to skip ofs_delta: Whether OFS deltas can be included progress: Optional progress reporting method """ # TODO(jelmer): More efficient implementation return pack_objects_to_data( self.generate_pack_contents(have, want, shallow, progress) ) def peel_sha(self, sha): """Peel all tags from a SHA. Args: sha: The object SHA to peel. Returns: The fully-peeled SHA1 of a tag object, after peeling all intermediate tags; if the original ref does not point to a tag, this will equal the original SHA1. """ obj = self[sha] obj_class = object_class(obj.type_name) while obj_class is Tag: obj_class, sha = obj.object obj = self[sha] return obj def _collect_ancestors( self, heads, common=set(), shallow=set(), get_parents=lambda commit: commit.parents, ): """Collect all ancestors of heads up to (excluding) those in common. Args: heads: commits to start from common: commits to end at, or empty set to walk repository completely get_parents: Optional function for getting the parents of a commit. Returns: a tuple (A, B) where A - all commits reachable from heads but not present in common, B - common (shared) elements that are directly reachable from heads """ bases = set() commits = set() queue = [] queue.extend(heads) while queue: e = queue.pop(0) if e in common: bases.add(e) elif e not in commits: commits.add(e) if e in shallow: continue cmt = self[e] queue.extend(get_parents(cmt)) return (commits, bases) def _get_depth( self, head, get_parents=lambda commit: commit.parents, max_depth=None, ): """Return the current available depth for the given head. For commits with multiple parents, the largest possible depth will be returned. Args: head: commit to start from get_parents: optional function for getting the parents of a commit max_depth: maximum depth to search """ if head not in self: return 0 current_depth = 1 queue = [(head, current_depth)] while queue and (max_depth is None or current_depth < max_depth): e, depth = queue.pop(0) current_depth = max(current_depth, depth) cmt = self[e] if isinstance(cmt, Tag): _cls, sha = cmt.object cmt = self[sha] queue.extend( (parent, depth + 1) for parent in get_parents(cmt) if parent in self ) return current_depth def close(self): """Close any files opened by this object store.""" # Default implementation is a NO-OP class PackBasedObjectStore(BaseObjectStore): def __init__(self, pack_compression_level=-1): self._pack_cache = {} self.pack_compression_level = pack_compression_level @property def alternates(self): return [] def contains_packed(self, sha): """Check if a particular object is present by SHA1 and is packed. This does not check alternates. """ for pack in self.packs: try: if sha in pack: return True except PackFileDisappeared: pass return False def __contains__(self, sha): """Check if a particular object is present by SHA1. This method makes no distinction between loose and packed objects. """ if self.contains_packed(sha) or self.contains_loose(sha): return True for alternate in self.alternates: if sha in alternate: return True return False def _add_cached_pack(self, base_name, pack): """Add a newly appeared pack to the cache by path.""" prev_pack = self._pack_cache.get(base_name) if prev_pack is not pack: self._pack_cache[base_name] = pack if prev_pack: prev_pack.close() def _clear_cached_packs(self): pack_cache = self._pack_cache self._pack_cache = {} while pack_cache: (name, pack) = pack_cache.popitem() pack.close() def _iter_cached_packs(self): return self._pack_cache.values() def _update_pack_cache(self): raise NotImplementedError(self._update_pack_cache) def close(self): self._clear_cached_packs() @property def packs(self): """List with pack objects.""" return list(self._iter_cached_packs()) + list(self._update_pack_cache()) def _iter_alternate_objects(self): """Iterate over the SHAs of all the objects in alternate stores.""" for alternate in self.alternates: for alternate_object in alternate: yield alternate_object def _iter_loose_objects(self): """Iterate over the SHAs of all loose objects.""" raise NotImplementedError(self._iter_loose_objects) def _get_loose_object(self, sha): raise NotImplementedError(self._get_loose_object) def _remove_loose_object(self, sha): raise NotImplementedError(self._remove_loose_object) def _remove_pack(self, name): raise NotImplementedError(self._remove_pack) def pack_loose_objects(self): """Pack loose objects. Returns: Number of objects packed """ objects = set() for sha in self._iter_loose_objects(): objects.add((self._get_loose_object(sha), None)) self.add_objects(list(objects)) for obj, path in objects: self._remove_loose_object(obj.id) return len(objects) def repack(self): """Repack the packs in this repository. Note that this implementation is fairly naive and currently keeps all objects in memory while it repacks. """ loose_objects = set() for sha in self._iter_loose_objects(): loose_objects.add(self._get_loose_object(sha)) objects = {(obj, None) for obj in loose_objects} old_packs = {p.name(): p for p in self.packs} for name, pack in old_packs.items(): objects.update((obj, None) for obj in pack.iterobjects()) # The name of the consolidated pack might match the name of a # pre-existing pack. Take care not to remove the newly created # consolidated pack. consolidated = self.add_objects(objects) old_packs.pop(consolidated.name(), None) for obj in loose_objects: self._remove_loose_object(obj.id) for name, pack in old_packs.items(): self._remove_pack(pack) self._update_pack_cache() return len(objects) def __iter__(self): """Iterate over the SHAs that are present in this store.""" self._update_pack_cache() for pack in self._iter_cached_packs(): try: for sha in pack: yield sha except PackFileDisappeared: pass for sha in self._iter_loose_objects(): yield sha for sha in self._iter_alternate_objects(): yield sha def contains_loose(self, sha): """Check if a particular object is present by SHA1 and is loose. This does not check alternates. """ return self._get_loose_object(sha) is not None def get_raw(self, name): """Obtain the raw fulltext for an object. Args: name: sha for the object. Returns: tuple with numeric type and object contents. """ if name == ZERO_SHA: raise KeyError(name) if len(name) == 40: sha = hex_to_sha(name) hexsha = name elif len(name) == 20: sha = name hexsha = None else: raise AssertionError("Invalid object name %r" % (name,)) for pack in self._iter_cached_packs(): try: return pack.get_raw(sha) except (KeyError, PackFileDisappeared): pass if hexsha is None: hexsha = sha_to_hex(name) ret = self._get_loose_object(hexsha) if ret is not None: return ret.type_num, ret.as_raw_string() # Maybe something else has added a pack with the object # in the mean time? for pack in self._update_pack_cache(): try: return pack.get_raw(sha) except KeyError: pass for alternate in self.alternates: try: return alternate.get_raw(hexsha) except KeyError: pass raise KeyError(hexsha) def add_objects(self, objects, progress=None): """Add a set of objects to this object store. Args: objects: Iterable over (object, path) tuples, should support __len__. Returns: Pack object of the objects written. """ return self.add_pack_data(*pack_objects_to_data(objects), progress=progress) class DiskObjectStore(PackBasedObjectStore): """Git-style object store that exists on disk.""" def __init__(self, path, loose_compression_level=-1, pack_compression_level=-1): """Open an object store. Args: path: Path of the object store. loose_compression_level: zlib compression level for loose objects pack_compression_level: zlib compression level for pack objects """ super(DiskObjectStore, self).__init__( pack_compression_level=pack_compression_level ) self.path = path self.pack_dir = os.path.join(self.path, PACKDIR) self._alternates = None self.loose_compression_level = loose_compression_level self.pack_compression_level = pack_compression_level def __repr__(self): return "<%s(%r)>" % (self.__class__.__name__, self.path) @classmethod def from_config(cls, path, config): try: default_compression_level = int( config.get((b"core",), b"compression").decode() ) except KeyError: default_compression_level = -1 try: loose_compression_level = int( config.get((b"core",), b"looseCompression").decode() ) except KeyError: loose_compression_level = default_compression_level try: pack_compression_level = int( config.get((b"core",), "packCompression").decode() ) except KeyError: pack_compression_level = default_compression_level return cls(path, loose_compression_level, pack_compression_level) @property def alternates(self): if self._alternates is not None: return self._alternates self._alternates = [] for path in self._read_alternate_paths(): self._alternates.append(DiskObjectStore(path)) return self._alternates def _read_alternate_paths(self): try: f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") except FileNotFoundError: return with f: for line in f.readlines(): line = line.rstrip(b"\n") if line.startswith(b"#"): continue if os.path.isabs(line): yield os.fsdecode(line) else: yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) def add_alternate_path(self, path): """Add an alternate path to this object store.""" try: os.mkdir(os.path.join(self.path, INFODIR)) except FileExistsError: pass alternates_path = os.path.join(self.path, INFODIR, "alternates") with GitFile(alternates_path, "wb") as f: try: orig_f = open(alternates_path, "rb") except FileNotFoundError: pass else: with orig_f: f.write(orig_f.read()) f.write(os.fsencode(path) + b"\n") if not os.path.isabs(path): path = os.path.join(self.path, path) self.alternates.append(DiskObjectStore(path)) def _update_pack_cache(self): """Read and iterate over new pack files and cache them.""" try: pack_dir_contents = os.listdir(self.pack_dir) except FileNotFoundError: self.close() return [] pack_files = set() for name in pack_dir_contents: if name.startswith("pack-") and name.endswith(".pack"): # verify that idx exists first (otherwise the pack was not yet # fully written) idx_name = os.path.splitext(name)[0] + ".idx" if idx_name in pack_dir_contents: pack_name = name[: -len(".pack")] pack_files.add(pack_name) # Open newly appeared pack files new_packs = [] for f in pack_files: if f not in self._pack_cache: pack = Pack(os.path.join(self.pack_dir, f)) new_packs.append(pack) self._pack_cache[f] = pack # Remove disappeared pack files for f in set(self._pack_cache) - pack_files: self._pack_cache.pop(f).close() return new_packs def _get_shafile_path(self, sha): # Check from object dir return hex_to_filename(self.path, sha) def _iter_loose_objects(self): for base in os.listdir(self.path): if len(base) != 2: continue for rest in os.listdir(os.path.join(self.path, base)): sha = os.fsencode(base + rest) if not valid_hexsha(sha): continue yield sha def _get_loose_object(self, sha): path = self._get_shafile_path(sha) try: return ShaFile.from_path(path) except FileNotFoundError: return None def _remove_loose_object(self, sha): os.remove(self._get_shafile_path(sha)) def _remove_pack(self, pack): try: del self._pack_cache[os.path.basename(pack._basename)] except KeyError: pass pack.close() os.remove(pack.data.path) os.remove(pack.index.path) def _get_pack_basepath(self, entries): suffix = iter_sha1(entry[0] for entry in entries) # TODO: Handle self.pack_dir being bytes suffix = suffix.decode("ascii") return os.path.join(self.pack_dir, "pack-" + suffix) def _complete_thin_pack(self, f, path, copier, indexer): """Move a specific file containing a pack into the pack directory. Note: The file should be on the same file system as the packs directory. Args: f: Open file object for the pack. path: Path to the pack file. copier: A PackStreamCopier to use for writing pack data. indexer: A PackIndexer for indexing the pack. """ entries = list(indexer) # Update the header with the new number of objects. f.seek(0) write_pack_header(f, len(entries) + len(indexer.ext_refs())) # Must flush before reading (http://bugs.python.org/issue3207) f.flush() # Rescan the rest of the pack, computing the SHA with the new header. new_sha = compute_file_sha(f, end_ofs=-20) # Must reposition before writing (http://bugs.python.org/issue3207) f.seek(0, os.SEEK_CUR) # Complete the pack. for ext_sha in indexer.ext_refs(): assert len(ext_sha) == 20 type_num, data = self.get_raw(ext_sha) offset = f.tell() crc32 = write_pack_object( f, type_num, data, sha=new_sha, compression_level=self.pack_compression_level, ) entries.append((ext_sha, offset, crc32)) pack_sha = new_sha.digest() f.write(pack_sha) f.close() # Move the pack in. entries.sort() pack_base_name = self._get_pack_basepath(entries) target_pack = pack_base_name + ".pack" if sys.platform == "win32": # Windows might have the target pack file lingering. Attempt # removal, silently passing if the target does not exist. try: os.remove(target_pack) except FileNotFoundError: pass os.rename(path, target_pack) # Write the index. index_file = GitFile(pack_base_name + ".idx", "wb", mask=PACK_MODE) try: write_pack_index_v2(index_file, entries, pack_sha) index_file.close() finally: index_file.abort() # Add the pack to the store and return it. final_pack = Pack(pack_base_name) final_pack.check_length_and_checksum() self._add_cached_pack(pack_base_name, final_pack) return final_pack def add_thin_pack(self, read_all, read_some): """Add a new thin pack to this object store. Thin packs are packs that contain deltas with parents that exist outside the pack. They should never be placed in the object store directly, and always indexed and completed as they are copied. Args: read_all: Read function that blocks until the number of requested bytes are read. read_some: Read function that returns at least one byte, but may not return the number of bytes requested. Returns: A Pack object pointing at the now-completed thin pack in the objects/pack directory. """ import tempfile fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") with os.fdopen(fd, "w+b") as f: os.chmod(path, PACK_MODE) indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) copier.verify() return self._complete_thin_pack(f, path, copier, indexer) def move_in_pack(self, path): """Move a specific file containing a pack into the pack directory. Note: The file should be on the same file system as the packs directory. Args: path: Path to the pack file. """ with PackData(path) as p: entries = p.sorted_entries() basename = self._get_pack_basepath(entries) index_name = basename + ".idx" if not os.path.exists(index_name): with GitFile(index_name, "wb", mask=PACK_MODE) as f: write_pack_index_v2(f, entries, p.get_stored_checksum()) for pack in self.packs: if pack._basename == basename: return pack target_pack = basename + ".pack" if sys.platform == "win32": # Windows might have the target pack file lingering. Attempt # removal, silently passing if the target does not exist. try: os.remove(target_pack) except FileNotFoundError: pass os.rename(path, target_pack) final_pack = Pack(basename) self._add_cached_pack(basename, final_pack) return final_pack def add_pack(self): """Add a new pack to this object store. Returns: Fileobject to write to, a commit function to call when the pack is finished and an abort function. """ import tempfile fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") f = os.fdopen(fd, "wb") os.chmod(path, PACK_MODE) def commit(): f.flush() os.fsync(fd) f.close() if os.path.getsize(path) > 0: return self.move_in_pack(path) else: os.remove(path) return None def abort(): f.close() os.remove(path) return f, commit, abort def add_object(self, obj): """Add a single object to this object store. Args: obj: Object to add """ path = self._get_shafile_path(obj.id) dir = os.path.dirname(path) try: os.mkdir(dir) except FileExistsError: pass if os.path.exists(path): return # Already there, no need to write again with GitFile(path, "wb", mask=PACK_MODE) as f: f.write( obj.as_legacy_object(compression_level=self.loose_compression_level) ) @classmethod def init(cls, path): try: os.mkdir(path) except FileExistsError: pass os.mkdir(os.path.join(path, "info")) os.mkdir(os.path.join(path, PACKDIR)) return cls(path) class MemoryObjectStore(BaseObjectStore): """Object store that keeps all objects in memory.""" def __init__(self): super(MemoryObjectStore, self).__init__() self._data = {} self.pack_compression_level = -1 def _to_hexsha(self, sha): if len(sha) == 40: return sha elif len(sha) == 20: return sha_to_hex(sha) else: raise ValueError("Invalid sha %r" % (sha,)) def contains_loose(self, sha): """Check if a particular object is present by SHA1 and is loose.""" return self._to_hexsha(sha) in self._data def contains_packed(self, sha): """Check if a particular object is present by SHA1 and is packed.""" return False def __iter__(self): """Iterate over the SHAs that are present in this store.""" return iter(self._data.keys()) @property def packs(self): """List with pack objects.""" return [] def get_raw(self, name): """Obtain the raw text for an object. Args: name: sha for the object. Returns: tuple with numeric type and object contents. """ obj = self[self._to_hexsha(name)] return obj.type_num, obj.as_raw_string() def __getitem__(self, name): return self._data[self._to_hexsha(name)].copy() def __delitem__(self, name): """Delete an object from this store, for testing only.""" del self._data[self._to_hexsha(name)] def add_object(self, obj): """Add a single object to this object store.""" self._data[obj.id] = obj.copy() def add_objects(self, objects, progress=None): """Add a set of objects to this object store. Args: objects: Iterable over a list of (object, path) tuples """ for obj, path in objects: self.add_object(obj) def add_pack(self): """Add a new pack to this object store. Because this object store doesn't support packs, we extract and add the individual objects. Returns: Fileobject to write to and a commit function to call when the pack is finished. """ f = BytesIO() def commit(): p = PackData.from_file(BytesIO(f.getvalue()), f.tell()) f.close() for obj in PackInflater.for_pack_data(p, self.get_raw): self.add_object(obj) def abort(): pass return f, commit, abort def _complete_thin_pack(self, f, indexer): """Complete a thin pack by adding external references. Args: f: Open file object for the pack. indexer: A PackIndexer for indexing the pack. """ entries = list(indexer) # Update the header with the new number of objects. f.seek(0) write_pack_header(f, len(entries) + len(indexer.ext_refs())) # Rescan the rest of the pack, computing the SHA with the new header. new_sha = compute_file_sha(f, end_ofs=-20) # Complete the pack. for ext_sha in indexer.ext_refs(): assert len(ext_sha) == 20 type_num, data = self.get_raw(ext_sha) write_pack_object(f, type_num, data, sha=new_sha) pack_sha = new_sha.digest() f.write(pack_sha) def add_thin_pack(self, read_all, read_some): """Add a new thin pack to this object store. Thin packs are packs that contain deltas with parents that exist outside the pack. Because this object store doesn't support packs, we extract and add the individual objects. Args: read_all: Read function that blocks until the number of requested bytes are read. read_some: Read function that returns at least one byte, but may not return the number of bytes requested. """ f, commit, abort = self.add_pack() try: indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) copier.verify() self._complete_thin_pack(f, indexer) except BaseException: abort() raise else: commit() class ObjectIterator(object): """Interface for iterating over objects.""" def iterobjects(self): raise NotImplementedError(self.iterobjects) class ObjectStoreIterator(ObjectIterator): """ObjectIterator that works on top of an ObjectStore.""" def __init__(self, store, sha_iter): """Create a new ObjectIterator. Args: store: Object store to retrieve from sha_iter: Iterator over (sha, path) tuples """ self.store = store self.sha_iter = sha_iter self._shas = [] def __iter__(self): """Yield tuple with next object and path.""" for sha, path in self.itershas(): yield self.store[sha], path def iterobjects(self): """Iterate over just the objects.""" for o, path in self: yield o def itershas(self): """Iterate over the SHAs.""" for sha in self._shas: yield sha for sha in self.sha_iter: self._shas.append(sha) yield sha def __contains__(self, needle): """Check if an object is present. Note: This checks if the object is present in the underlying object store, not if it would be yielded by the iterator. Args: needle: SHA1 of the object to check for """ if needle == ZERO_SHA: return False return needle in self.store def __getitem__(self, key): """Find an object by SHA1. Note: This retrieves the object from the underlying object store. It will also succeed if the object would not be returned by the iterator. """ return self.store[key] def __len__(self): """Return the number of objects.""" return len(list(self.itershas())) def empty(self): import warnings warnings.warn("Use bool() instead.", DeprecationWarning) return self._empty() def _empty(self): it = self.itershas() try: next(it) except StopIteration: return True else: return False def __bool__(self): """Indicate whether this object has contents.""" return not self._empty() def tree_lookup_path(lookup_obj, root_sha, path): """Look up an object in a Git tree. Args: lookup_obj: Callback for retrieving object by SHA1 root_sha: SHA1 of the root tree path: Path to lookup Returns: A tuple of (mode, SHA) of the resulting path. """ tree = lookup_obj(root_sha) if not isinstance(tree, Tree): raise NotTreeError(root_sha) return tree.lookup_path(lookup_obj, path) def _collect_filetree_revs(obj_store, tree_sha, kset): """Collect SHA1s of files and directories for specified tree. Args: obj_store: Object store to get objects by SHA from tree_sha: tree reference to walk kset: set to fill with references to files and directories """ filetree = obj_store[tree_sha] for name, mode, sha in filetree.iteritems(): if not S_ISGITLINK(mode) and sha not in kset: kset.add(sha) if stat.S_ISDIR(mode): _collect_filetree_revs(obj_store, sha, kset) def _split_commits_and_tags(obj_store, lst, ignore_unknown=False): """Split object id list into three lists with commit, tag, and other SHAs. Commits referenced by tags are included into commits list as well. Only SHA1s known in this repository will get through, and unless ignore_unknown argument is True, KeyError is thrown for SHA1 missing in the repository Args: obj_store: Object store to get objects by SHA1 from lst: Collection of commit and tag SHAs ignore_unknown: True to skip SHA1 missing in the repository silently. Returns: A tuple of (commits, tags, others) SHA1s """ commits = set() tags = set() others = set() for e in lst: try: o = obj_store[e] except KeyError: if not ignore_unknown: raise else: if isinstance(o, Commit): commits.add(e) elif isinstance(o, Tag): tags.add(e) tagged = o.object[1] c, t, o = _split_commits_and_tags( obj_store, [tagged], ignore_unknown=ignore_unknown ) commits |= c tags |= t others |= o else: others.add(e) return (commits, tags, others) class MissingObjectFinder(object): """Find the objects missing from another object store. Args: object_store: Object store containing at least all objects to be sent haves: SHA1s of commits not to send (already present in target) wants: SHA1s of commits to send progress: Optional function to report progress to. get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags. get_parents: Optional function for getting the parents of a commit. tagged: dict of pointed-to sha -> tag sha for including tags """ def __init__( self, object_store, haves, wants, shallow=None, progress=None, get_tagged=None, get_parents=lambda commit: commit.parents, ): self.object_store = object_store if shallow is None: shallow = set() self._get_parents = get_parents # process Commits and Tags differently # Note, while haves may list commits/tags not available locally, # and such SHAs would get filtered out by _split_commits_and_tags, # wants shall list only known SHAs, and otherwise # _split_commits_and_tags fails with KeyError have_commits, have_tags, have_others = _split_commits_and_tags( object_store, haves, True ) want_commits, want_tags, want_others = _split_commits_and_tags( object_store, wants, False ) # all_ancestors is a set of commits that shall not be sent # (complete repository up to 'haves') all_ancestors = object_store._collect_ancestors( have_commits, shallow=shallow, get_parents=self._get_parents )[0] # all_missing - complete set of commits between haves and wants # common - commits from all_ancestors we hit into while # traversing parent hierarchy of wants missing_commits, common_commits = object_store._collect_ancestors( want_commits, all_ancestors, shallow=shallow, get_parents=self._get_parents, ) self.sha_done = set() # Now, fill sha_done with commits and revisions of # files and directories known to be both locally # and on target. Thus these commits and files # won't get selected for fetch for h in common_commits: self.sha_done.add(h) cmt = object_store[h] _collect_filetree_revs(object_store, cmt.tree, self.sha_done) # record tags we have as visited, too for t in have_tags: self.sha_done.add(t) missing_tags = want_tags.difference(have_tags) missing_others = want_others.difference(have_others) # in fact, what we 'want' is commits, tags, and others # we've found missing wants = missing_commits.union(missing_tags) wants = wants.union(missing_others) self.objects_to_send = set([(w, None, False) for w in wants]) if progress is None: self.progress = lambda x: None else: self.progress = progress self._tagged = get_tagged and get_tagged() or {} def add_todo(self, entries): self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done]) def next(self): while True: if not self.objects_to_send: return None (sha, name, leaf) = self.objects_to_send.pop() if sha not in self.sha_done: break if not leaf: o = self.object_store[sha] if isinstance(o, Commit): self.add_todo([(o.tree, "", False)]) elif isinstance(o, Tree): self.add_todo( [ (s, n, not stat.S_ISDIR(m)) for n, m, s in o.iteritems() if not S_ISGITLINK(m) ] ) elif isinstance(o, Tag): self.add_todo([(o.object[1], None, False)]) if sha in self._tagged: self.add_todo([(self._tagged[sha], None, True)]) self.sha_done.add(sha) self.progress(("counting objects: %d\r" % len(self.sha_done)).encode("ascii")) return (sha, name) __next__ = next class ObjectStoreGraphWalker(object): """Graph walker that finds what commits are missing from an object store. :ivar heads: Revisions without descendants in the local repo :ivar get_parents: Function to retrieve parents in the local repo """ def __init__(self, local_heads, get_parents, shallow=None): """Create a new instance. Args: local_heads: Heads to start search with get_parents: Function for finding the parents of a SHA1. """ self.heads = set(local_heads) self.get_parents = get_parents self.parents = {} if shallow is None: shallow = set() self.shallow = shallow def ack(self, sha): """Ack that a revision and its ancestors are present in the source.""" if len(sha) != 40: raise ValueError("unexpected sha %r received" % sha) ancestors = set([sha]) # stop if we run out of heads to remove while self.heads: for a in ancestors: if a in self.heads: self.heads.remove(a) # collect all ancestors new_ancestors = set() for a in ancestors: ps = self.parents.get(a) if ps is not None: new_ancestors.update(ps) self.parents[a] = None # no more ancestors; stop if not new_ancestors: break ancestors = new_ancestors def next(self): """Iterate over ancestors of heads in the target.""" if self.heads: ret = self.heads.pop() try: ps = self.get_parents(ret) except KeyError: return None self.parents[ret] = ps self.heads.update([p for p in ps if p not in self.parents]) return ret return None __next__ = next def commit_tree_changes(object_store, tree, changes): """Commit a specified set of changes to a tree structure. This will apply a set of changes on top of an existing tree, storing new objects in object_store. changes are a list of tuples with (path, mode, object_sha). Paths can be both blobs and trees. See the mode and object sha to None deletes the path. This method works especially well if there are only a small number of changes to a big tree. For a large number of changes to a large tree, use e.g. commit_tree. Args: object_store: Object store to store new objects in and retrieve old ones from. tree: Original tree root changes: changes to apply Returns: New tree root object """ # TODO(jelmer): Save up the objects and add them using .add_objects # rather than with individual calls to .add_object. nested_changes = {} for (path, new_mode, new_sha) in changes: try: (dirname, subpath) = path.split(b"/", 1) except ValueError: if new_sha is None: del tree[path] else: tree[path] = (new_mode, new_sha) else: nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) for name, subchanges in nested_changes.items(): try: orig_subtree = object_store[tree[name][1]] except KeyError: orig_subtree = Tree() subtree = commit_tree_changes(object_store, orig_subtree, subchanges) if len(subtree) == 0: del tree[name] else: tree[name] = (stat.S_IFDIR, subtree.id) object_store.add_object(tree) return tree class OverlayObjectStore(BaseObjectStore): """Object store that can overlay multiple object stores.""" def __init__(self, bases, add_store=None): self.bases = bases self.add_store = add_store def add_object(self, object): if self.add_store is None: raise NotImplementedError(self.add_object) return self.add_store.add_object(object) def add_objects(self, objects, progress=None): if self.add_store is None: raise NotImplementedError(self.add_object) return self.add_store.add_objects(objects, progress) @property def packs(self): ret = [] for b in self.bases: ret.extend(b.packs) return ret def __iter__(self): done = set() for b in self.bases: for o_id in b: if o_id not in done: yield o_id done.add(o_id) def get_raw(self, sha_id): for b in self.bases: try: return b.get_raw(sha_id) except KeyError: pass raise KeyError(sha_id) def contains_packed(self, sha): for b in self.bases: if b.contains_packed(sha): return True return False def contains_loose(self, sha): for b in self.bases: if b.contains_loose(sha): return True return False def read_packs_file(f): """Yield the packs listed in a packs file.""" for line in f.read().splitlines(): if not line: continue (kind, name) = line.split(b" ", 1) if kind != b"P": continue yield os.fsdecode(name) class BucketBasedObjectStore(PackBasedObjectStore): """Object store implementation that uses a bucket store like S3 as backend. """ def _iter_loose_objects(self): """Iterate over the SHAs of all loose objects.""" return iter([]) def _get_loose_object(self, sha): return None def _remove_loose_object(self, sha): # Doesn't exist.. pass def _remove_pack(self, name): raise NotImplementedError(self._remove_pack) def _iter_pack_names(self): raise NotImplementedError(self._iter_pack_names) def _get_pack(self, name): raise NotImplementedError(self._get_pack) def _update_pack_cache(self): pack_files = set(self._iter_pack_names()) # Open newly appeared pack files new_packs = [] for f in pack_files: if f not in self._pack_cache: pack = self._get_pack(f) new_packs.append(pack) self._pack_cache[f] = pack # Remove disappeared pack files for f in set(self._pack_cache) - pack_files: self._pack_cache.pop(f).close() return new_packs def _upload_pack(self, basename, pack_file, index_file): raise NotImplementedError def add_pack(self): """Add a new pack to this object store. Returns: Fileobject to write to, a commit function to call when the pack is finished and an abort function. """ import tempfile pf = tempfile.SpooledTemporaryFile() def commit(): if pf.tell() == 0: pf.close() return None pf.seek(0) p = PackData(pf.name, pf) entries = p.sorted_entries() basename = iter_sha1(entry[0] for entry in entries).decode('ascii') idxf = tempfile.SpooledTemporaryFile() checksum = p.get_stored_checksum() write_pack_index_v2(idxf, entries, checksum) idxf.seek(0) idx = load_pack_index_file(basename + '.idx', idxf) for pack in self.packs: if pack.get_stored_checksum() == p.get_stored_checksum(): p.close() idx.close() return pack pf.seek(0) idxf.seek(0) self._upload_pack(basename, pf, idxf) final_pack = Pack.from_objects(p, idx) self._add_cached_pack(basename, final_pack) return final_pack return pf, commit, pf.close diff --git a/dulwich/tests/test_object_store.py b/dulwich/tests/test_object_store.py index cae4bb99..68789aac 100644 --- a/dulwich/tests/test_object_store.py +++ b/dulwich/tests/test_object_store.py @@ -1,794 +1,796 @@ # test_object_store.py -- tests for object_store.py # Copyright (C) 2008 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for the object store interface.""" from contextlib import closing from io import BytesIO from unittest import skipUnless import os import shutil import stat +import sys import tempfile from dulwich.index import ( commit_tree, ) from dulwich.errors import ( NotTreeError, ) from dulwich.objects import ( sha_to_hex, Blob, Tree, TreeEntry, EmptyFileException, ) from dulwich.object_store import ( DiskObjectStore, MemoryObjectStore, OverlayObjectStore, ObjectStoreGraphWalker, commit_tree_changes, read_packs_file, tree_lookup_path, ) from dulwich.pack import ( REF_DELTA, write_pack_objects, ) from dulwich.protocol import DEPTH_INFINITE from dulwich.tests import ( TestCase, ) from dulwich.tests.utils import ( make_object, make_tag, build_pack, ) try: from unittest.mock import patch except ImportError: patch = None # type: ignore testobject = make_object(Blob, data=b"yummy data") class ObjectStoreTests(object): def test_determine_wants_all(self): self.assertEqual( [b"1" * 40], self.store.determine_wants_all({b"refs/heads/foo": b"1" * 40}), ) def test_determine_wants_all_zero(self): self.assertEqual( [], self.store.determine_wants_all({b"refs/heads/foo": b"0" * 40}) ) @skipUnless(patch, "Required mock.patch") def test_determine_wants_all_depth(self): self.store.add_object(testobject) refs = {b"refs/heads/foo": testobject.id} with patch.object(self.store, "_get_depth", return_value=1) as m: self.assertEqual( [], self.store.determine_wants_all(refs, depth=0) ) self.assertEqual( [testobject.id], self.store.determine_wants_all(refs, depth=DEPTH_INFINITE), ) m.assert_not_called() self.assertEqual( [], self.store.determine_wants_all(refs, depth=1) ) m.assert_called_with(testobject.id) self.assertEqual( [testobject.id], self.store.determine_wants_all(refs, depth=2) ) def test_get_depth(self): self.assertEqual( 0, self.store._get_depth(testobject.id) ) self.store.add_object(testobject) self.assertEqual( 1, self.store._get_depth(testobject.id, get_parents=lambda x: []) ) parent = make_object(Blob, data=b"parent data") self.store.add_object(parent) self.assertEqual( 2, self.store._get_depth( testobject.id, get_parents=lambda x: [parent.id] if x == testobject else [], ), ) def test_iter(self): self.assertEqual([], list(self.store)) def test_get_nonexistant(self): self.assertRaises(KeyError, lambda: self.store[b"a" * 40]) def test_contains_nonexistant(self): self.assertFalse((b"a" * 40) in self.store) def test_add_objects_empty(self): self.store.add_objects([]) def test_add_commit(self): # TODO: Argh, no way to construct Git commit objects without # access to a serialized form. self.store.add_objects([]) def test_store_resilience(self): """Test if updating an existing stored object doesn't erase the object from the store. """ test_object = make_object(Blob, data=b"data") self.store.add_object(test_object) test_object_id = test_object.id test_object.data = test_object.data + b"update" stored_test_object = self.store[test_object_id] self.assertNotEqual(test_object.id, stored_test_object.id) self.assertEqual(stored_test_object.id, test_object_id) def test_add_object(self): self.store.add_object(testobject) self.assertEqual(set([testobject.id]), set(self.store)) self.assertTrue(testobject.id in self.store) r = self.store[testobject.id] self.assertEqual(r, testobject) def test_add_objects(self): data = [(testobject, "mypath")] self.store.add_objects(data) self.assertEqual(set([testobject.id]), set(self.store)) self.assertTrue(testobject.id in self.store) r = self.store[testobject.id] self.assertEqual(r, testobject) def test_tree_changes(self): blob_a1 = make_object(Blob, data=b"a1") blob_a2 = make_object(Blob, data=b"a2") blob_b = make_object(Blob, data=b"b") for blob in [blob_a1, blob_a2, blob_b]: self.store.add_object(blob) blobs_1 = [(b"a", blob_a1.id, 0o100644), (b"b", blob_b.id, 0o100644)] tree1_id = commit_tree(self.store, blobs_1) blobs_2 = [(b"a", blob_a2.id, 0o100644), (b"b", blob_b.id, 0o100644)] tree2_id = commit_tree(self.store, blobs_2) change_a = ( (b"a", b"a"), (0o100644, 0o100644), (blob_a1.id, blob_a2.id), ) self.assertEqual([change_a], list(self.store.tree_changes(tree1_id, tree2_id))) self.assertEqual( [ change_a, ((b"b", b"b"), (0o100644, 0o100644), (blob_b.id, blob_b.id)), ], list(self.store.tree_changes(tree1_id, tree2_id, want_unchanged=True)), ) def test_iter_tree_contents(self): blob_a = make_object(Blob, data=b"a") blob_b = make_object(Blob, data=b"b") blob_c = make_object(Blob, data=b"c") for blob in [blob_a, blob_b, blob_c]: self.store.add_object(blob) blobs = [ (b"a", blob_a.id, 0o100644), (b"ad/b", blob_b.id, 0o100644), (b"ad/bd/c", blob_c.id, 0o100755), (b"ad/c", blob_c.id, 0o100644), (b"c", blob_c.id, 0o100644), ] tree_id = commit_tree(self.store, blobs) self.assertEqual( [TreeEntry(p, m, h) for (p, h, m) in blobs], list(self.store.iter_tree_contents(tree_id)), ) def test_iter_tree_contents_include_trees(self): blob_a = make_object(Blob, data=b"a") blob_b = make_object(Blob, data=b"b") blob_c = make_object(Blob, data=b"c") for blob in [blob_a, blob_b, blob_c]: self.store.add_object(blob) blobs = [ (b"a", blob_a.id, 0o100644), (b"ad/b", blob_b.id, 0o100644), (b"ad/bd/c", blob_c.id, 0o100755), ] tree_id = commit_tree(self.store, blobs) tree = self.store[tree_id] tree_ad = self.store[tree[b"ad"][1]] tree_bd = self.store[tree_ad[b"bd"][1]] expected = [ TreeEntry(b"", 0o040000, tree_id), TreeEntry(b"a", 0o100644, blob_a.id), TreeEntry(b"ad", 0o040000, tree_ad.id), TreeEntry(b"ad/b", 0o100644, blob_b.id), TreeEntry(b"ad/bd", 0o040000, tree_bd.id), TreeEntry(b"ad/bd/c", 0o100755, blob_c.id), ] actual = self.store.iter_tree_contents(tree_id, include_trees=True) self.assertEqual(expected, list(actual)) def make_tag(self, name, obj): tag = make_tag(obj, name=name) self.store.add_object(tag) return tag def test_peel_sha(self): self.store.add_object(testobject) tag1 = self.make_tag(b"1", testobject) tag2 = self.make_tag(b"2", testobject) tag3 = self.make_tag(b"3", testobject) for obj in [testobject, tag1, tag2, tag3]: self.assertEqual(testobject, self.store.peel_sha(obj.id)) def test_get_raw(self): self.store.add_object(testobject) self.assertEqual( (Blob.type_num, b"yummy data"), self.store.get_raw(testobject.id) ) def test_close(self): # For now, just check that close doesn't barf. self.store.add_object(testobject) self.store.close() class OverlayObjectStoreTests(ObjectStoreTests, TestCase): def setUp(self): TestCase.setUp(self) self.bases = [MemoryObjectStore(), MemoryObjectStore()] self.store = OverlayObjectStore(self.bases, self.bases[0]) class MemoryObjectStoreTests(ObjectStoreTests, TestCase): def setUp(self): TestCase.setUp(self) self.store = MemoryObjectStore() def test_add_pack(self): o = MemoryObjectStore() f, commit, abort = o.add_pack() try: b = make_object(Blob, data=b"more yummy data") write_pack_objects(f, [(b, None)]) except BaseException: abort() raise else: commit() def test_add_pack_emtpy(self): o = MemoryObjectStore() f, commit, abort = o.add_pack() commit() def test_add_thin_pack(self): o = MemoryObjectStore() blob = make_object(Blob, data=b"yummy data") o.add_object(blob) f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (blob.id, b"more yummy data")), ], store=o, ) o.add_thin_pack(f.read, None) packed_blob_sha = sha_to_hex(entries[0][3]) self.assertEqual( (Blob.type_num, b"more yummy data"), o.get_raw(packed_blob_sha) ) def test_add_thin_pack_empty(self): o = MemoryObjectStore() f = BytesIO() entries = build_pack(f, [], store=o) self.assertEqual([], entries) o.add_thin_pack(f.read, None) class PackBasedObjectStoreTests(ObjectStoreTests): def tearDown(self): for pack in self.store.packs: pack.close() def test_empty_packs(self): self.assertEqual([], list(self.store.packs)) def test_pack_loose_objects(self): b1 = make_object(Blob, data=b"yummy data") self.store.add_object(b1) b2 = make_object(Blob, data=b"more yummy data") self.store.add_object(b2) b3 = make_object(Blob, data=b"even more yummy data") b4 = make_object(Blob, data=b"and more yummy data") self.store.add_objects([(b3, None), (b4, None)]) self.assertEqual({b1.id, b2.id, b3.id, b4.id}, set(self.store)) self.assertEqual(1, len(self.store.packs)) self.assertEqual(2, self.store.pack_loose_objects()) self.assertNotEqual([], list(self.store.packs)) self.assertEqual(0, self.store.pack_loose_objects()) def test_repack(self): b1 = make_object(Blob, data=b"yummy data") self.store.add_object(b1) b2 = make_object(Blob, data=b"more yummy data") self.store.add_object(b2) b3 = make_object(Blob, data=b"even more yummy data") b4 = make_object(Blob, data=b"and more yummy data") self.store.add_objects([(b3, None), (b4, None)]) b5 = make_object(Blob, data=b"and more data") b6 = make_object(Blob, data=b"and some more data") self.store.add_objects([(b5, None), (b6, None)]) self.assertEqual({b1.id, b2.id, b3.id, b4.id, b5.id, b6.id}, set(self.store)) self.assertEqual(2, len(self.store.packs)) self.assertEqual(6, self.store.repack()) self.assertEqual(1, len(self.store.packs)) self.assertEqual(0, self.store.pack_loose_objects()) def test_repack_existing(self): b1 = make_object(Blob, data=b"yummy data") self.store.add_object(b1) b2 = make_object(Blob, data=b"more yummy data") self.store.add_object(b2) self.store.add_objects([(b1, None), (b2, None)]) self.store.add_objects([(b2, None)]) self.assertEqual({b1.id, b2.id}, set(self.store)) self.assertEqual(2, len(self.store.packs)) self.assertEqual(2, self.store.repack()) self.assertEqual(1, len(self.store.packs)) self.assertEqual(0, self.store.pack_loose_objects()) self.assertEqual({b1.id, b2.id}, set(self.store)) self.assertEqual(1, len(self.store.packs)) self.assertEqual(2, self.store.repack()) self.assertEqual(1, len(self.store.packs)) self.assertEqual(0, self.store.pack_loose_objects()) class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase): def setUp(self): TestCase.setUp(self) self.store_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.store_dir) self.store = DiskObjectStore.init(self.store_dir) def tearDown(self): TestCase.tearDown(self) PackBasedObjectStoreTests.tearDown(self) def test_loose_compression_level(self): alternate_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, alternate_dir) alternate_store = DiskObjectStore(alternate_dir, loose_compression_level=6) b2 = make_object(Blob, data=b"yummy data") alternate_store.add_object(b2) def test_alternates(self): alternate_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, alternate_dir) alternate_store = DiskObjectStore(alternate_dir) b2 = make_object(Blob, data=b"yummy data") alternate_store.add_object(b2) store = DiskObjectStore(self.store_dir) self.assertRaises(KeyError, store.__getitem__, b2.id) store.add_alternate_path(alternate_dir) self.assertIn(b2.id, store) self.assertEqual(b2, store[b2.id]) def test_read_alternate_paths(self): store = DiskObjectStore(self.store_dir) abs_path = os.path.abspath(os.path.normpath("/abspath")) # ensures in particular existence of the alternates file store.add_alternate_path(abs_path) self.assertEqual(set(store._read_alternate_paths()), {abs_path}) store.add_alternate_path("relative-path") self.assertIn( os.path.join(store.path, "relative-path"), set(store._read_alternate_paths()), ) # arguably, add_alternate_path() could strip comments. # Meanwhile it's more convenient to use it than to import INFODIR store.add_alternate_path("# comment") for alt_path in store._read_alternate_paths(): self.assertNotIn("#", alt_path) def test_file_modes(self): self.store.add_object(testobject) path = self.store._get_shafile_path(testobject.id) mode = os.stat(path).st_mode - self.assertEqual(oct(mode), "0o100444") + packmode = "0o100444" if sys.platform != "win32" else "0o100666" + self.assertEqual(oct(mode), packmode) def test_corrupted_object_raise_exception(self): """Corrupted sha1 disk file should raise specific exception""" self.store.add_object(testobject) self.assertEqual( (Blob.type_num, b"yummy data"), self.store.get_raw(testobject.id) ) self.assertTrue(self.store.contains_loose(testobject.id)) self.assertIsNotNone(self.store._get_loose_object(testobject.id)) path = self.store._get_shafile_path(testobject.id) old_mode = os.stat(path).st_mode os.chmod(path, 0o600) with open(path, "wb") as f: # corrupt the file f.write(b"") os.chmod(path, old_mode) expected_error_msg = "Corrupted empty file detected" try: self.store.contains_loose(testobject.id) except EmptyFileException as e: self.assertEqual(str(e), expected_error_msg) try: self.store._get_loose_object(testobject.id) except EmptyFileException as e: self.assertEqual(str(e), expected_error_msg) # this does not change iteration on loose objects though self.assertEqual([testobject.id], list(self.store._iter_loose_objects())) def test_tempfile_in_loose_store(self): self.store.add_object(testobject) self.assertEqual([testobject.id], list(self.store._iter_loose_objects())) # add temporary files to the loose store for i in range(256): dirname = os.path.join(self.store_dir, "%02x" % i) if not os.path.isdir(dirname): os.makedirs(dirname) fd, n = tempfile.mkstemp(prefix="tmp_obj_", dir=dirname) os.close(fd) self.assertEqual([testobject.id], list(self.store._iter_loose_objects())) def test_add_alternate_path(self): store = DiskObjectStore(self.store_dir) self.assertEqual([], list(store._read_alternate_paths())) store.add_alternate_path("/foo/path") self.assertEqual(["/foo/path"], list(store._read_alternate_paths())) store.add_alternate_path("/bar/path") self.assertEqual( ["/foo/path", "/bar/path"], list(store._read_alternate_paths()) ) def test_rel_alternative_path(self): alternate_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, alternate_dir) alternate_store = DiskObjectStore(alternate_dir) b2 = make_object(Blob, data=b"yummy data") alternate_store.add_object(b2) store = DiskObjectStore(self.store_dir) self.assertRaises(KeyError, store.__getitem__, b2.id) store.add_alternate_path(os.path.relpath(alternate_dir, self.store_dir)) self.assertEqual(list(alternate_store), list(store.alternates[0])) self.assertIn(b2.id, store) self.assertEqual(b2, store[b2.id]) def test_pack_dir(self): o = DiskObjectStore(self.store_dir) self.assertEqual(os.path.join(self.store_dir, "pack"), o.pack_dir) def test_add_pack(self): o = DiskObjectStore(self.store_dir) f, commit, abort = o.add_pack() try: b = make_object(Blob, data=b"more yummy data") write_pack_objects(f, [(b, None)]) except BaseException: abort() raise else: commit() def test_add_thin_pack(self): o = DiskObjectStore(self.store_dir) try: blob = make_object(Blob, data=b"yummy data") o.add_object(blob) f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (blob.id, b"more yummy data")), ], store=o, ) with o.add_thin_pack(f.read, None) as pack: packed_blob_sha = sha_to_hex(entries[0][3]) pack.check_length_and_checksum() self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack)) self.assertTrue(o.contains_packed(packed_blob_sha)) self.assertTrue(o.contains_packed(blob.id)) self.assertEqual( (Blob.type_num, b"more yummy data"), o.get_raw(packed_blob_sha), ) finally: o.close() def test_add_thin_pack_empty(self): with closing(DiskObjectStore(self.store_dir)) as o: f = BytesIO() entries = build_pack(f, [], store=o) self.assertEqual([], entries) o.add_thin_pack(f.read, None) class TreeLookupPathTests(TestCase): def setUp(self): TestCase.setUp(self) self.store = MemoryObjectStore() blob_a = make_object(Blob, data=b"a") blob_b = make_object(Blob, data=b"b") blob_c = make_object(Blob, data=b"c") for blob in [blob_a, blob_b, blob_c]: self.store.add_object(blob) blobs = [ (b"a", blob_a.id, 0o100644), (b"ad/b", blob_b.id, 0o100644), (b"ad/bd/c", blob_c.id, 0o100755), (b"ad/c", blob_c.id, 0o100644), (b"c", blob_c.id, 0o100644), ] self.tree_id = commit_tree(self.store, blobs) def get_object(self, sha): return self.store[sha] def test_lookup_blob(self): o_id = tree_lookup_path(self.get_object, self.tree_id, b"a")[1] self.assertTrue(isinstance(self.store[o_id], Blob)) def test_lookup_tree(self): o_id = tree_lookup_path(self.get_object, self.tree_id, b"ad")[1] self.assertTrue(isinstance(self.store[o_id], Tree)) o_id = tree_lookup_path(self.get_object, self.tree_id, b"ad/bd")[1] self.assertTrue(isinstance(self.store[o_id], Tree)) o_id = tree_lookup_path(self.get_object, self.tree_id, b"ad/bd/")[1] self.assertTrue(isinstance(self.store[o_id], Tree)) def test_lookup_nonexistent(self): self.assertRaises( KeyError, tree_lookup_path, self.get_object, self.tree_id, b"j" ) def test_lookup_not_tree(self): self.assertRaises( NotTreeError, tree_lookup_path, self.get_object, self.tree_id, b"ad/b/j", ) class ObjectStoreGraphWalkerTests(TestCase): def get_walker(self, heads, parent_map): new_parent_map = dict( [(k * 40, [(p * 40) for p in ps]) for (k, ps) in parent_map.items()] ) return ObjectStoreGraphWalker( [x * 40 for x in heads], new_parent_map.__getitem__ ) def test_ack_invalid_value(self): gw = self.get_walker([], {}) self.assertRaises(ValueError, gw.ack, "tooshort") def test_empty(self): gw = self.get_walker([], {}) self.assertIs(None, next(gw)) gw.ack(b"a" * 40) self.assertIs(None, next(gw)) def test_descends(self): gw = self.get_walker([b"a"], {b"a": [b"b"], b"b": []}) self.assertEqual(b"a" * 40, next(gw)) self.assertEqual(b"b" * 40, next(gw)) def test_present(self): gw = self.get_walker([b"a"], {b"a": [b"b"], b"b": []}) gw.ack(b"a" * 40) self.assertIs(None, next(gw)) def test_parent_present(self): gw = self.get_walker([b"a"], {b"a": [b"b"], b"b": []}) self.assertEqual(b"a" * 40, next(gw)) gw.ack(b"a" * 40) self.assertIs(None, next(gw)) def test_child_ack_later(self): gw = self.get_walker([b"a"], {b"a": [b"b"], b"b": [b"c"], b"c": []}) self.assertEqual(b"a" * 40, next(gw)) self.assertEqual(b"b" * 40, next(gw)) gw.ack(b"a" * 40) self.assertIs(None, next(gw)) def test_only_once(self): # a b # | | # c d # \ / # e gw = self.get_walker( [b"a", b"b"], { b"a": [b"c"], b"b": [b"d"], b"c": [b"e"], b"d": [b"e"], b"e": [], }, ) walk = [] acked = False walk.append(next(gw)) walk.append(next(gw)) # A branch (a, c) or (b, d) may be done after 2 steps or 3 depending on # the order walked: 3-step walks include (a, b, c) and (b, a, d), etc. if walk == [b"a" * 40, b"c" * 40] or walk == [b"b" * 40, b"d" * 40]: gw.ack(walk[0]) acked = True walk.append(next(gw)) if not acked and walk[2] == b"c" * 40: gw.ack(b"a" * 40) elif not acked and walk[2] == b"d" * 40: gw.ack(b"b" * 40) walk.append(next(gw)) self.assertIs(None, next(gw)) self.assertEqual([b"a" * 40, b"b" * 40, b"c" * 40, b"d" * 40], sorted(walk)) self.assertLess(walk.index(b"a" * 40), walk.index(b"c" * 40)) self.assertLess(walk.index(b"b" * 40), walk.index(b"d" * 40)) class CommitTreeChangesTests(TestCase): def setUp(self): super(CommitTreeChangesTests, self).setUp() self.store = MemoryObjectStore() self.blob_a = make_object(Blob, data=b"a") self.blob_b = make_object(Blob, data=b"b") self.blob_c = make_object(Blob, data=b"c") for blob in [self.blob_a, self.blob_b, self.blob_c]: self.store.add_object(blob) blobs = [ (b"a", self.blob_a.id, 0o100644), (b"ad/b", self.blob_b.id, 0o100644), (b"ad/bd/c", self.blob_c.id, 0o100755), (b"ad/c", self.blob_c.id, 0o100644), (b"c", self.blob_c.id, 0o100644), ] self.tree_id = commit_tree(self.store, blobs) def test_no_changes(self): self.assertEqual( self.store[self.tree_id], commit_tree_changes(self.store, self.store[self.tree_id], []), ) def test_add_blob(self): blob_d = make_object(Blob, data=b"d") new_tree = commit_tree_changes( self.store, self.store[self.tree_id], [(b"d", 0o100644, blob_d.id)] ) self.assertEqual( new_tree[b"d"], (33188, b"c59d9b6344f1af00e504ba698129f07a34bbed8d"), ) def test_add_blob_in_dir(self): blob_d = make_object(Blob, data=b"d") new_tree = commit_tree_changes( self.store, self.store[self.tree_id], [(b"e/f/d", 0o100644, blob_d.id)], ) self.assertEqual( new_tree.items(), [ TreeEntry(path=b"a", mode=stat.S_IFREG | 0o100644, sha=self.blob_a.id), TreeEntry( path=b"ad", mode=stat.S_IFDIR, sha=b"0e2ce2cd7725ff4817791be31ccd6e627e801f4a", ), TreeEntry(path=b"c", mode=stat.S_IFREG | 0o100644, sha=self.blob_c.id), TreeEntry( path=b"e", mode=stat.S_IFDIR, sha=b"6ab344e288724ac2fb38704728b8896e367ed108", ), ], ) e_tree = self.store[new_tree[b"e"][1]] self.assertEqual( e_tree.items(), [ TreeEntry( path=b"f", mode=stat.S_IFDIR, sha=b"24d2c94d8af232b15a0978c006bf61ef4479a0a5", ) ], ) f_tree = self.store[e_tree[b"f"][1]] self.assertEqual( f_tree.items(), [TreeEntry(path=b"d", mode=stat.S_IFREG | 0o100644, sha=blob_d.id)], ) def test_delete_blob(self): new_tree = commit_tree_changes( self.store, self.store[self.tree_id], [(b"ad/bd/c", None, None)] ) self.assertEqual(set(new_tree), {b"a", b"ad", b"c"}) ad_tree = self.store[new_tree[b"ad"][1]] self.assertEqual(set(ad_tree), {b"b", b"c"}) class TestReadPacksFile(TestCase): def test_read_packs(self): self.assertEqual( ["pack-1.pack"], list( read_packs_file( BytesIO( b"""P pack-1.pack """ ) ) ), ) diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py index e866ee7b..278259c3 100644 --- a/dulwich/tests/test_pack.py +++ b/dulwich/tests/test_pack.py @@ -1,1238 +1,1240 @@ # test_pack.py -- Tests for the handling of git packs. # Copyright (C) 2007 James Westby # Copyright (C) 2008 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for Dulwich packs.""" from io import BytesIO from hashlib import sha1 import os import shutil +import sys import tempfile import zlib from dulwich.errors import ( ApplyDeltaError, ChecksumMismatch, ) from dulwich.file import ( GitFile, ) from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( hex_to_sha, sha_to_hex, Commit, Tree, Blob, ) from dulwich.pack import ( OFS_DELTA, REF_DELTA, MemoryPackIndex, Pack, PackData, apply_delta, create_delta, deltify_pack_objects, load_pack_index, UnpackedObject, read_zlib_chunks, write_pack_header, write_pack_index_v1, write_pack_index_v2, write_pack_object, write_pack, unpack_object, compute_file_sha, PackStreamReader, DeltaChainIterator, _delta_encode_size, _encode_copy_operation, ) from dulwich.tests import ( TestCase, ) from dulwich.tests.utils import ( make_object, build_pack, ) pack1_sha = b"bc63ddad95e7321ee734ea11a7a62d314e0d7481" a_sha = b"6f670c0fb53f9463760b7295fbb814e965fb20c8" tree_sha = b"b2a2766a2879c209ab1176e7e778b81ae422eeaa" commit_sha = b"f18faa16531ac570a3fdc8c7ca16682548dafd12" +indexmode = "0o100644" if sys.platform != "win32" else "0o100666" class PackTests(TestCase): """Base class for testing packs""" def setUp(self): super(PackTests, self).setUp() self.tempdir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.tempdir) datadir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/packs")) def get_pack_index(self, sha): """Returns a PackIndex from the datadir with the given sha""" return load_pack_index( os.path.join(self.datadir, "pack-%s.idx" % sha.decode("ascii")) ) def get_pack_data(self, sha): """Returns a PackData object from the datadir with the given sha""" return PackData( os.path.join(self.datadir, "pack-%s.pack" % sha.decode("ascii")) ) def get_pack(self, sha): return Pack(os.path.join(self.datadir, "pack-%s" % sha.decode("ascii"))) def assertSucceeds(self, func, *args, **kwargs): try: func(*args, **kwargs) except ChecksumMismatch as e: self.fail(e) class PackIndexTests(PackTests): """Class that tests the index of packfiles""" def test_object_index(self): """Tests that the correct object offset is returned from the index.""" p = self.get_pack_index(pack1_sha) self.assertRaises(KeyError, p.object_index, pack1_sha) self.assertEqual(p.object_index(a_sha), 178) self.assertEqual(p.object_index(tree_sha), 138) self.assertEqual(p.object_index(commit_sha), 12) def test_object_sha1(self): """Tests that the correct object offset is returned from the index.""" p = self.get_pack_index(pack1_sha) self.assertRaises(KeyError, p.object_sha1, 876) self.assertEqual(p.object_sha1(178), hex_to_sha(a_sha)) self.assertEqual(p.object_sha1(138), hex_to_sha(tree_sha)) self.assertEqual(p.object_sha1(12), hex_to_sha(commit_sha)) def test_index_len(self): p = self.get_pack_index(pack1_sha) self.assertEqual(3, len(p)) def test_get_stored_checksum(self): p = self.get_pack_index(pack1_sha) self.assertEqual( b"f2848e2ad16f329ae1c92e3b95e91888daa5bd01", sha_to_hex(p.get_stored_checksum()), ) self.assertEqual( b"721980e866af9a5f93ad674144e1459b8ba3e7b7", sha_to_hex(p.get_pack_checksum()), ) def test_index_check(self): p = self.get_pack_index(pack1_sha) self.assertSucceeds(p.check) def test_iterentries(self): p = self.get_pack_index(pack1_sha) entries = [(sha_to_hex(s), o, c) for s, o, c in p.iterentries()] self.assertEqual( [ (b"6f670c0fb53f9463760b7295fbb814e965fb20c8", 178, None), (b"b2a2766a2879c209ab1176e7e778b81ae422eeaa", 138, None), (b"f18faa16531ac570a3fdc8c7ca16682548dafd12", 12, None), ], entries, ) def test_iter(self): p = self.get_pack_index(pack1_sha) self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p)) class TestPackDeltas(TestCase): test_string1 = b"The answer was flailing in the wind" test_string2 = b"The answer was falling down the pipe" test_string3 = b"zzzzz" test_string_empty = b"" test_string_big = b"Z" * 8192 test_string_huge = b"Z" * 100000 def _test_roundtrip(self, base, target): self.assertEqual( target, b"".join(apply_delta(base, create_delta(base, target))) ) def test_nochange(self): self._test_roundtrip(self.test_string1, self.test_string1) def test_nochange_huge(self): self._test_roundtrip(self.test_string_huge, self.test_string_huge) def test_change(self): self._test_roundtrip(self.test_string1, self.test_string2) def test_rewrite(self): self._test_roundtrip(self.test_string1, self.test_string3) def test_empty_to_big(self): self._test_roundtrip(self.test_string_empty, self.test_string_big) def test_empty_to_huge(self): self._test_roundtrip(self.test_string_empty, self.test_string_huge) def test_huge_copy(self): self._test_roundtrip( self.test_string_huge + self.test_string1, self.test_string_huge + self.test_string2, ) def test_dest_overflow(self): self.assertRaises( ApplyDeltaError, apply_delta, b"a" * 0x10000, b"\x80\x80\x04\x80\x80\x04\x80" + b"a" * 0x10000, ) self.assertRaises( ApplyDeltaError, apply_delta, b"", b"\x00\x80\x02\xb0\x11\x11" ) def test_pypy_issue(self): # Test for https://github.com/jelmer/dulwich/issues/509 / # https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work chunks = [ b"tree 03207ccf58880a748188836155ceed72f03d65d6\n" b"parent 408fbab530fd4abe49249a636a10f10f44d07a21\n" b"author Victor Stinner " b"1421355207 +0100\n" b"committer Victor Stinner " b"1421355207 +0100\n" b"\n" b"Backout changeset 3a06020af8cf\n" b"\nStreamWriter: close() now clears the reference to the " b"transport\n" b"\nStreamWriter now raises an exception if it is closed: " b"write(), writelines(),\n" b"write_eof(), can_write_eof(), get_extra_info(), drain().\n" ] delta = [ b"\xcd\x03\xad\x03]tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n" b"parent 20a103cc90135494162e819f98d0edfc1f1fba6b\x91]7\x0510738" b"\x91\x99@\x0b10738 +0100\x93\x04\x01\xc9" ] res = apply_delta(chunks, delta) expected = [ b"tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n" b"parent 20a103cc90135494162e819f98d0edfc1f1fba6b", b"\nauthor Victor Stinner 14213", b"10738", b" +0100\ncommitter Victor Stinner " b"14213", b"10738 +0100", b"\n\nStreamWriter: close() now clears the reference to the " b"transport\n\n" b"StreamWriter now raises an exception if it is closed: " b"write(), writelines(),\n" b"write_eof(), can_write_eof(), get_extra_info(), drain().\n", ] self.assertEqual(b"".join(expected), b"".join(res)) class TestPackData(PackTests): """Tests getting the data from the packfile.""" def test_create_pack(self): self.get_pack_data(pack1_sha).close() def test_from_file(self): path = os.path.join(self.datadir, "pack-%s.pack" % pack1_sha.decode("ascii")) with open(path, "rb") as f: PackData.from_file(f, os.path.getsize(path)) def test_pack_len(self): with self.get_pack_data(pack1_sha) as p: self.assertEqual(3, len(p)) def test_index_check(self): with self.get_pack_data(pack1_sha) as p: self.assertSucceeds(p.check) def test_iterobjects(self): with self.get_pack_data(pack1_sha) as p: commit_data = ( b"tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n" b"author James Westby " b"1174945067 +0100\n" b"committer James Westby " b"1174945067 +0100\n" b"\n" b"Test commit\n" ) blob_sha = b"6f670c0fb53f9463760b7295fbb814e965fb20c8" tree_data = b"100644 a\0" + hex_to_sha(blob_sha) actual = [] for offset, type_num, chunks, crc32 in p.iterobjects(): actual.append((offset, type_num, b"".join(chunks), crc32)) self.assertEqual( [ (12, 1, commit_data, 3775879613), (138, 2, tree_data, 912998690), (178, 3, b"test 1\n", 1373561701), ], actual, ) def test_iterentries(self): with self.get_pack_data(pack1_sha) as p: entries = {(sha_to_hex(s), o, c) for s, o, c in p.iterentries()} self.assertEqual( set( [ ( b"6f670c0fb53f9463760b7295fbb814e965fb20c8", 178, 1373561701, ), ( b"b2a2766a2879c209ab1176e7e778b81ae422eeaa", 138, 912998690, ), ( b"f18faa16531ac570a3fdc8c7ca16682548dafd12", 12, 3775879613, ), ] ), entries, ) def test_create_index_v1(self): with self.get_pack_data(pack1_sha) as p: filename = os.path.join(self.tempdir, "v1test.idx") p.create_index_v1(filename) idx1 = load_pack_index(filename) idx2 = self.get_pack_index(pack1_sha) - self.assertEqual(oct(os.stat(filename).st_mode), "0o100644") + self.assertEqual(oct(os.stat(filename).st_mode), indexmode) self.assertEqual(idx1, idx2) def test_create_index_v2(self): with self.get_pack_data(pack1_sha) as p: filename = os.path.join(self.tempdir, "v2test.idx") p.create_index_v2(filename) idx1 = load_pack_index(filename) idx2 = self.get_pack_index(pack1_sha) - self.assertEqual(oct(os.stat(filename).st_mode), "0o100644") + self.assertEqual(oct(os.stat(filename).st_mode), indexmode) self.assertEqual(idx1, idx2) def test_compute_file_sha(self): f = BytesIO(b"abcd1234wxyz") self.assertEqual( sha1(b"abcd1234wxyz").hexdigest(), compute_file_sha(f).hexdigest() ) self.assertEqual( sha1(b"abcd1234wxyz").hexdigest(), compute_file_sha(f, buffer_size=5).hexdigest(), ) self.assertEqual( sha1(b"abcd1234").hexdigest(), compute_file_sha(f, end_ofs=-4).hexdigest(), ) self.assertEqual( sha1(b"1234wxyz").hexdigest(), compute_file_sha(f, start_ofs=4).hexdigest(), ) self.assertEqual( sha1(b"1234").hexdigest(), compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest(), ) def test_compute_file_sha_short_file(self): f = BytesIO(b"abcd1234wxyz") self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20) self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20) self.assertRaises( AssertionError, compute_file_sha, f, start_ofs=10, end_ofs=-12 ) class TestPack(PackTests): def test_len(self): with self.get_pack(pack1_sha) as p: self.assertEqual(3, len(p)) def test_contains(self): with self.get_pack(pack1_sha) as p: self.assertTrue(tree_sha in p) def test_get(self): with self.get_pack(pack1_sha) as p: self.assertEqual(type(p[tree_sha]), Tree) def test_iter(self): with self.get_pack(pack1_sha) as p: self.assertEqual(set([tree_sha, commit_sha, a_sha]), set(p)) def test_iterobjects(self): with self.get_pack(pack1_sha) as p: expected = set([p[s] for s in [commit_sha, tree_sha, a_sha]]) self.assertEqual(expected, set(list(p.iterobjects()))) def test_pack_tuples(self): with self.get_pack(pack1_sha) as p: tuples = p.pack_tuples() expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]]) self.assertEqual(expected, set(list(tuples))) self.assertEqual(expected, set(list(tuples))) self.assertEqual(3, len(tuples)) def test_get_object_at(self): """Tests random access for non-delta objects""" with self.get_pack(pack1_sha) as p: obj = p[a_sha] self.assertEqual(obj.type_name, b"blob") self.assertEqual(obj.sha().hexdigest().encode("ascii"), a_sha) obj = p[tree_sha] self.assertEqual(obj.type_name, b"tree") self.assertEqual(obj.sha().hexdigest().encode("ascii"), tree_sha) obj = p[commit_sha] self.assertEqual(obj.type_name, b"commit") self.assertEqual(obj.sha().hexdigest().encode("ascii"), commit_sha) def test_copy(self): with self.get_pack(pack1_sha) as origpack: self.assertSucceeds(origpack.index.check) basename = os.path.join(self.tempdir, "Elch") write_pack(basename, origpack.pack_tuples()) with Pack(basename) as newpack: self.assertEqual(origpack, newpack) self.assertSucceeds(newpack.index.check) self.assertEqual(origpack.name(), newpack.name()) self.assertEqual( origpack.index.get_pack_checksum(), newpack.index.get_pack_checksum(), ) wrong_version = origpack.index.version != newpack.index.version orig_checksum = origpack.index.get_stored_checksum() new_checksum = newpack.index.get_stored_checksum() self.assertTrue(wrong_version or orig_checksum == new_checksum) def test_commit_obj(self): with self.get_pack(pack1_sha) as p: commit = p[commit_sha] self.assertEqual(b"James Westby ", commit.author) self.assertEqual([], commit.parents) def _copy_pack(self, origpack): basename = os.path.join(self.tempdir, "somepack") write_pack(basename, origpack.pack_tuples()) return Pack(basename) def test_keep_no_message(self): with self.get_pack(pack1_sha) as p: p = self._copy_pack(p) with p: keepfile_name = p.keep() # file should exist self.assertTrue(os.path.exists(keepfile_name)) with open(keepfile_name, "r") as f: buf = f.read() self.assertEqual("", buf) def test_keep_message(self): with self.get_pack(pack1_sha) as p: p = self._copy_pack(p) msg = b"some message" with p: keepfile_name = p.keep(msg) # file should exist self.assertTrue(os.path.exists(keepfile_name)) # and contain the right message, with a linefeed with open(keepfile_name, "rb") as f: buf = f.read() self.assertEqual(msg + b"\n", buf) def test_name(self): with self.get_pack(pack1_sha) as p: self.assertEqual(pack1_sha, p.name()) def test_length_mismatch(self): with self.get_pack_data(pack1_sha) as data: index = self.get_pack_index(pack1_sha) Pack.from_objects(data, index).check_length_and_checksum() data._file.seek(12) bad_file = BytesIO() write_pack_header(bad_file, 9999) bad_file.write(data._file.read()) bad_file = BytesIO(bad_file.getvalue()) bad_data = PackData("", file=bad_file) bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index) self.assertRaises(AssertionError, lambda: bad_pack.data) self.assertRaises( AssertionError, bad_pack.check_length_and_checksum ) def test_checksum_mismatch(self): with self.get_pack_data(pack1_sha) as data: index = self.get_pack_index(pack1_sha) Pack.from_objects(data, index).check_length_and_checksum() data._file.seek(0) bad_file = BytesIO(data._file.read()[:-20] + (b"\xff" * 20)) bad_data = PackData("", file=bad_file) bad_pack = Pack.from_lazy_objects(lambda: bad_data, lambda: index) self.assertRaises(ChecksumMismatch, lambda: bad_pack.data) self.assertRaises( ChecksumMismatch, bad_pack.check_length_and_checksum ) def test_iterobjects_2(self): with self.get_pack(pack1_sha) as p: objs = {o.id: o for o in p.iterobjects()} self.assertEqual(3, len(objs)) self.assertEqual(sorted(objs), sorted(p.index)) self.assertTrue(isinstance(objs[a_sha], Blob)) self.assertTrue(isinstance(objs[tree_sha], Tree)) self.assertTrue(isinstance(objs[commit_sha], Commit)) class TestThinPack(PackTests): def setUp(self): super(TestThinPack, self).setUp() self.store = MemoryObjectStore() self.blobs = {} for blob in (b"foo", b"bar", b"foo1234", b"bar2468"): self.blobs[blob] = make_object(Blob, data=blob) self.store.add_object(self.blobs[b"foo"]) self.store.add_object(self.blobs[b"bar"]) # Build a thin pack. 'foo' is as an external reference, 'bar' an # internal reference. self.pack_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.pack_dir) self.pack_prefix = os.path.join(self.pack_dir, "pack") with open(self.pack_prefix + ".pack", "wb") as f: build_pack( f, [ (REF_DELTA, (self.blobs[b"foo"].id, b"foo1234")), (Blob.type_num, b"bar"), (REF_DELTA, (self.blobs[b"bar"].id, b"bar2468")), ], store=self.store, ) # Index the new pack. with self.make_pack(True) as pack: with PackData(pack._data_path) as data: data.pack = pack data.create_index(self.pack_prefix + ".idx") del self.store[self.blobs[b"bar"].id] def make_pack(self, resolve_ext_ref): return Pack( self.pack_prefix, resolve_ext_ref=self.store.get_raw if resolve_ext_ref else None, ) def test_get_raw(self): with self.make_pack(False) as p: self.assertRaises(KeyError, p.get_raw, self.blobs[b"foo1234"].id) with self.make_pack(True) as p: self.assertEqual((3, b"foo1234"), p.get_raw(self.blobs[b"foo1234"].id)) def test_get_raw_unresolved(self): with self.make_pack(False) as p: self.assertEqual( ( 7, b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c", [b"x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l"], ), p.get_raw_unresolved(self.blobs[b"foo1234"].id), ) with self.make_pack(True) as p: self.assertEqual( ( 7, b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c", [b"x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l"], ), p.get_raw_unresolved(self.blobs[b"foo1234"].id), ) def test_iterobjects(self): with self.make_pack(False) as p: self.assertRaises(KeyError, list, p.iterobjects()) with self.make_pack(True) as p: self.assertEqual( sorted( [ self.blobs[b"foo1234"].id, self.blobs[b"bar"].id, self.blobs[b"bar2468"].id, ] ), sorted(o.id for o in p.iterobjects()), ) class WritePackTests(TestCase): def test_write_pack_header(self): f = BytesIO() write_pack_header(f, 42) self.assertEqual(b"PACK\x00\x00\x00\x02\x00\x00\x00*", f.getvalue()) def test_write_pack_object(self): f = BytesIO() f.write(b"header") offset = f.tell() crc32 = write_pack_object(f, Blob.type_num, b"blob") self.assertEqual(crc32, zlib.crc32(f.getvalue()[6:]) & 0xFFFFFFFF) f.write(b"x") # unpack_object needs extra trailing data. f.seek(offset) unpacked, unused = unpack_object(f.read, compute_crc32=True) self.assertEqual(Blob.type_num, unpacked.pack_type_num) self.assertEqual(Blob.type_num, unpacked.obj_type_num) self.assertEqual([b"blob"], unpacked.decomp_chunks) self.assertEqual(crc32, unpacked.crc32) self.assertEqual(b"x", unused) def test_write_pack_object_sha(self): f = BytesIO() f.write(b"header") offset = f.tell() sha_a = sha1(b"foo") sha_b = sha_a.copy() write_pack_object(f, Blob.type_num, b"blob", sha=sha_a) self.assertNotEqual(sha_a.digest(), sha_b.digest()) sha_b.update(f.getvalue()[offset:]) self.assertEqual(sha_a.digest(), sha_b.digest()) def test_write_pack_object_compression_level(self): f = BytesIO() f.write(b"header") offset = f.tell() sha_a = sha1(b"foo") sha_b = sha_a.copy() write_pack_object(f, Blob.type_num, b"blob", sha=sha_a, compression_level=6) self.assertNotEqual(sha_a.digest(), sha_b.digest()) sha_b.update(f.getvalue()[offset:]) self.assertEqual(sha_a.digest(), sha_b.digest()) pack_checksum = hex_to_sha("721980e866af9a5f93ad674144e1459b8ba3e7b7") class BaseTestPackIndexWriting(object): def assertSucceeds(self, func, *args, **kwargs): try: func(*args, **kwargs) except ChecksumMismatch as e: self.fail(e) def index(self, filename, entries, pack_checksum): raise NotImplementedError(self.index) def test_empty(self): idx = self.index("empty.idx", [], pack_checksum) self.assertEqual(idx.get_pack_checksum(), pack_checksum) self.assertEqual(0, len(idx)) def test_large(self): entry1_sha = hex_to_sha("4e6388232ec39792661e2e75db8fb117fc869ce6") entry2_sha = hex_to_sha("e98f071751bd77f59967bfa671cd2caebdccc9a2") entries = [ (entry1_sha, 0xF2972D0830529B87, 24), (entry2_sha, (~0xF2972D0830529B87) & (2 ** 64 - 1), 92), ] if not self._supports_large: self.assertRaises( TypeError, self.index, "single.idx", entries, pack_checksum ) return idx = self.index("single.idx", entries, pack_checksum) self.assertEqual(idx.get_pack_checksum(), pack_checksum) self.assertEqual(2, len(idx)) actual_entries = list(idx.iterentries()) self.assertEqual(len(entries), len(actual_entries)) for mine, actual in zip(entries, actual_entries): my_sha, my_offset, my_crc = mine actual_sha, actual_offset, actual_crc = actual self.assertEqual(my_sha, actual_sha) self.assertEqual(my_offset, actual_offset) if self._has_crc32_checksum: self.assertEqual(my_crc, actual_crc) else: self.assertTrue(actual_crc is None) def test_single(self): entry_sha = hex_to_sha("6f670c0fb53f9463760b7295fbb814e965fb20c8") my_entries = [(entry_sha, 178, 42)] idx = self.index("single.idx", my_entries, pack_checksum) self.assertEqual(idx.get_pack_checksum(), pack_checksum) self.assertEqual(1, len(idx)) actual_entries = list(idx.iterentries()) self.assertEqual(len(my_entries), len(actual_entries)) for mine, actual in zip(my_entries, actual_entries): my_sha, my_offset, my_crc = mine actual_sha, actual_offset, actual_crc = actual self.assertEqual(my_sha, actual_sha) self.assertEqual(my_offset, actual_offset) if self._has_crc32_checksum: self.assertEqual(my_crc, actual_crc) else: self.assertTrue(actual_crc is None) class BaseTestFilePackIndexWriting(BaseTestPackIndexWriting): def setUp(self): self.tempdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tempdir) def index(self, filename, entries, pack_checksum): path = os.path.join(self.tempdir, filename) self.writeIndex(path, entries, pack_checksum) idx = load_pack_index(path) self.assertSucceeds(idx.check) self.assertEqual(idx.version, self._expected_version) return idx def writeIndex(self, filename, entries, pack_checksum): # FIXME: Write to BytesIO instead rather than hitting disk ? with GitFile(filename, "wb") as f: self._write_fn(f, entries, pack_checksum) class TestMemoryIndexWriting(TestCase, BaseTestPackIndexWriting): def setUp(self): TestCase.setUp(self) self._has_crc32_checksum = True self._supports_large = True def index(self, filename, entries, pack_checksum): return MemoryPackIndex(entries, pack_checksum) def tearDown(self): TestCase.tearDown(self) class TestPackIndexWritingv1(TestCase, BaseTestFilePackIndexWriting): def setUp(self): TestCase.setUp(self) BaseTestFilePackIndexWriting.setUp(self) self._has_crc32_checksum = False self._expected_version = 1 self._supports_large = False self._write_fn = write_pack_index_v1 def tearDown(self): TestCase.tearDown(self) BaseTestFilePackIndexWriting.tearDown(self) class TestPackIndexWritingv2(TestCase, BaseTestFilePackIndexWriting): def setUp(self): TestCase.setUp(self) BaseTestFilePackIndexWriting.setUp(self) self._has_crc32_checksum = True self._supports_large = True self._expected_version = 2 self._write_fn = write_pack_index_v2 def tearDown(self): TestCase.tearDown(self) BaseTestFilePackIndexWriting.tearDown(self) class ReadZlibTests(TestCase): decomp = ( b"tree 4ada885c9196b6b6fa08744b5862bf92896fc002\n" b"parent None\n" b"author Jelmer Vernooij 1228980214 +0000\n" b"committer Jelmer Vernooij 1228980214 +0000\n" b"\n" b"Provide replacement for mmap()'s offset argument." ) comp = zlib.compress(decomp) extra = b"nextobject" def setUp(self): super(ReadZlibTests, self).setUp() self.read = BytesIO(self.comp + self.extra).read self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0) def test_decompress_size(self): good_decomp_len = len(self.decomp) self.unpacked.decomp_len = -1 self.assertRaises(ValueError, read_zlib_chunks, self.read, self.unpacked) self.unpacked.decomp_len = good_decomp_len - 1 self.assertRaises(zlib.error, read_zlib_chunks, self.read, self.unpacked) self.unpacked.decomp_len = good_decomp_len + 1 self.assertRaises(zlib.error, read_zlib_chunks, self.read, self.unpacked) def test_decompress_truncated(self): read = BytesIO(self.comp[:10]).read self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked) read = BytesIO(self.comp).read self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked) def test_decompress_empty(self): unpacked = UnpackedObject(Tree.type_num, None, 0, None) comp = zlib.compress(b"") read = BytesIO(comp + self.extra).read unused = read_zlib_chunks(read, unpacked) self.assertEqual(b"", b"".join(unpacked.decomp_chunks)) self.assertNotEqual(b"", unused) self.assertEqual(self.extra, unused + read()) def test_decompress_no_crc32(self): self.unpacked.crc32 = None read_zlib_chunks(self.read, self.unpacked) self.assertEqual(None, self.unpacked.crc32) def _do_decompress_test(self, buffer_size, **kwargs): unused = read_zlib_chunks( self.read, self.unpacked, buffer_size=buffer_size, **kwargs ) self.assertEqual(self.decomp, b"".join(self.unpacked.decomp_chunks)) self.assertEqual(zlib.crc32(self.comp), self.unpacked.crc32) self.assertNotEqual(b"", unused) self.assertEqual(self.extra, unused + self.read()) def test_simple_decompress(self): self._do_decompress_test(4096) self.assertEqual(None, self.unpacked.comp_chunks) # These buffer sizes are not intended to be realistic, but rather simulate # larger buffer sizes that may end at various places. def test_decompress_buffer_size_1(self): self._do_decompress_test(1) def test_decompress_buffer_size_2(self): self._do_decompress_test(2) def test_decompress_buffer_size_3(self): self._do_decompress_test(3) def test_decompress_buffer_size_4(self): self._do_decompress_test(4) def test_decompress_include_comp(self): self._do_decompress_test(4096, include_comp=True) self.assertEqual(self.comp, b"".join(self.unpacked.comp_chunks)) class DeltifyTests(TestCase): def test_empty(self): self.assertEqual([], list(deltify_pack_objects([]))) def test_single(self): b = Blob.from_string(b"foo") self.assertEqual( [(b.type_num, b.sha().digest(), None, b.as_raw_string())], list(deltify_pack_objects([(b, b"")])), ) def test_simple_delta(self): b1 = Blob.from_string(b"a" * 101) b2 = Blob.from_string(b"a" * 100) delta = create_delta(b1.as_raw_string(), b2.as_raw_string()) self.assertEqual( [ (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()), (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta), ], list(deltify_pack_objects([(b1, b""), (b2, b"")])), ) class TestPackStreamReader(TestCase): def test_read_objects_emtpy(self): f = BytesIO() build_pack(f, []) reader = PackStreamReader(f.read) self.assertEqual(0, len(list(reader.read_objects()))) def test_read_objects(self): f = BytesIO() entries = build_pack( f, [ (Blob.type_num, b"blob"), (OFS_DELTA, (0, b"blob1")), ], ) reader = PackStreamReader(f.read) objects = list(reader.read_objects(compute_crc32=True)) self.assertEqual(2, len(objects)) unpacked_blob, unpacked_delta = objects self.assertEqual(entries[0][0], unpacked_blob.offset) self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num) self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num) self.assertEqual(None, unpacked_blob.delta_base) self.assertEqual(b"blob", b"".join(unpacked_blob.decomp_chunks)) self.assertEqual(entries[0][4], unpacked_blob.crc32) self.assertEqual(entries[1][0], unpacked_delta.offset) self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num) self.assertEqual(None, unpacked_delta.obj_type_num) self.assertEqual( unpacked_delta.offset - unpacked_blob.offset, unpacked_delta.delta_base, ) delta = create_delta(b"blob", b"blob1") self.assertEqual(delta, b"".join(unpacked_delta.decomp_chunks)) self.assertEqual(entries[1][4], unpacked_delta.crc32) def test_read_objects_buffered(self): f = BytesIO() build_pack( f, [ (Blob.type_num, b"blob"), (OFS_DELTA, (0, b"blob1")), ], ) reader = PackStreamReader(f.read, zlib_bufsize=4) self.assertEqual(2, len(list(reader.read_objects()))) def test_read_objects_empty(self): reader = PackStreamReader(BytesIO().read) self.assertEqual([], list(reader.read_objects())) class TestPackIterator(DeltaChainIterator): _compute_crc32 = True def __init__(self, *args, **kwargs): super(TestPackIterator, self).__init__(*args, **kwargs) self._unpacked_offsets = set() def _result(self, unpacked): """Return entries in the same format as build_pack.""" return ( unpacked.offset, unpacked.obj_type_num, b"".join(unpacked.obj_chunks), unpacked.sha(), unpacked.crc32, ) def _resolve_object(self, offset, pack_type_num, base_chunks): assert offset not in self._unpacked_offsets, ( "Attempted to re-inflate offset %i" % offset ) self._unpacked_offsets.add(offset) return super(TestPackIterator, self)._resolve_object( offset, pack_type_num, base_chunks ) class DeltaChainIteratorTests(TestCase): def setUp(self): super(DeltaChainIteratorTests, self).setUp() self.store = MemoryObjectStore() self.fetched = set() def store_blobs(self, blobs_data): blobs = [] for data in blobs_data: blob = make_object(Blob, data=data) blobs.append(blob) self.store.add_object(blob) return blobs def get_raw_no_repeat(self, bin_sha): """Wrapper around store.get_raw that doesn't allow repeat lookups.""" hex_sha = sha_to_hex(bin_sha) self.assertFalse( hex_sha in self.fetched, "Attempted to re-fetch object %s" % hex_sha, ) self.fetched.add(hex_sha) return self.store.get_raw(hex_sha) def make_pack_iter(self, f, thin=None): if thin is None: thin = bool(list(self.store)) resolve_ext_ref = thin and self.get_raw_no_repeat or None data = PackData("test.pack", file=f) return TestPackIterator.for_pack_data(data, resolve_ext_ref=resolve_ext_ref) def assertEntriesMatch(self, expected_indexes, entries, pack_iter): expected = [entries[i] for i in expected_indexes] self.assertEqual(expected, list(pack_iter._walk_all_chains())) def test_no_deltas(self): f = BytesIO() entries = build_pack( f, [ (Commit.type_num, b"commit"), (Blob.type_num, b"blob"), (Tree.type_num, b"tree"), ], ) self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f)) def test_ofs_deltas(self): f = BytesIO() entries = build_pack( f, [ (Blob.type_num, b"blob"), (OFS_DELTA, (0, b"blob1")), (OFS_DELTA, (0, b"blob2")), ], ) self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f)) def test_ofs_deltas_chain(self): f = BytesIO() entries = build_pack( f, [ (Blob.type_num, b"blob"), (OFS_DELTA, (0, b"blob1")), (OFS_DELTA, (1, b"blob2")), ], ) self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f)) def test_ref_deltas(self): f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (1, b"blob1")), (Blob.type_num, (b"blob")), (REF_DELTA, (1, b"blob2")), ], ) self.assertEntriesMatch([1, 0, 2], entries, self.make_pack_iter(f)) def test_ref_deltas_chain(self): f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (2, b"blob1")), (Blob.type_num, (b"blob")), (REF_DELTA, (1, b"blob2")), ], ) self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f)) def test_ofs_and_ref_deltas(self): # Deltas pending on this offset are popped before deltas depending on # this ref. f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (1, b"blob1")), (Blob.type_num, (b"blob")), (OFS_DELTA, (1, b"blob2")), ], ) self.assertEntriesMatch([1, 2, 0], entries, self.make_pack_iter(f)) def test_mixed_chain(self): f = BytesIO() entries = build_pack( f, [ (Blob.type_num, b"blob"), (REF_DELTA, (2, b"blob2")), (OFS_DELTA, (0, b"blob1")), (OFS_DELTA, (1, b"blob3")), (OFS_DELTA, (0, b"bob")), ], ) self.assertEntriesMatch([0, 2, 4, 1, 3], entries, self.make_pack_iter(f)) def test_long_chain(self): n = 100 objects_spec = [(Blob.type_num, b"blob")] for i in range(n): objects_spec.append((OFS_DELTA, (i, b"blob" + str(i).encode("ascii")))) f = BytesIO() entries = build_pack(f, objects_spec) self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f)) def test_branchy_chain(self): n = 100 objects_spec = [(Blob.type_num, b"blob")] for i in range(n): objects_spec.append((OFS_DELTA, (0, b"blob" + str(i).encode("ascii")))) f = BytesIO() entries = build_pack(f, objects_spec) self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f)) def test_ext_ref(self): (blob,) = self.store_blobs([b"blob"]) f = BytesIO() entries = build_pack(f, [(REF_DELTA, (blob.id, b"blob1"))], store=self.store) pack_iter = self.make_pack_iter(f) self.assertEntriesMatch([0], entries, pack_iter) self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs()) def test_ext_ref_chain(self): (blob,) = self.store_blobs([b"blob"]) f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (1, b"blob2")), (REF_DELTA, (blob.id, b"blob1")), ], store=self.store, ) pack_iter = self.make_pack_iter(f) self.assertEntriesMatch([1, 0], entries, pack_iter) self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs()) def test_ext_ref_chain_degenerate(self): # Test a degenerate case where the sender is sending a REF_DELTA # object that expands to an object already in the repository. (blob,) = self.store_blobs([b"blob"]) (blob2,) = self.store_blobs([b"blob2"]) assert blob.id < blob2.id f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (blob.id, b"blob2")), (REF_DELTA, (0, b"blob3")), ], store=self.store, ) pack_iter = self.make_pack_iter(f) self.assertEntriesMatch([0, 1], entries, pack_iter) self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs()) def test_ext_ref_multiple_times(self): (blob,) = self.store_blobs([b"blob"]) f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (blob.id, b"blob1")), (REF_DELTA, (blob.id, b"blob2")), ], store=self.store, ) pack_iter = self.make_pack_iter(f) self.assertEntriesMatch([0, 1], entries, pack_iter) self.assertEqual([hex_to_sha(blob.id)], pack_iter.ext_refs()) def test_multiple_ext_refs(self): b1, b2 = self.store_blobs([b"foo", b"bar"]) f = BytesIO() entries = build_pack( f, [ (REF_DELTA, (b1.id, b"foo1")), (REF_DELTA, (b2.id, b"bar2")), ], store=self.store, ) pack_iter = self.make_pack_iter(f) self.assertEntriesMatch([0, 1], entries, pack_iter) self.assertEqual([hex_to_sha(b1.id), hex_to_sha(b2.id)], pack_iter.ext_refs()) def test_bad_ext_ref_non_thin_pack(self): (blob,) = self.store_blobs([b"blob"]) f = BytesIO() build_pack(f, [(REF_DELTA, (blob.id, b"blob1"))], store=self.store) pack_iter = self.make_pack_iter(f, thin=False) try: list(pack_iter._walk_all_chains()) self.fail() except KeyError as e: self.assertEqual(([blob.id],), e.args) def test_bad_ext_ref_thin_pack(self): b1, b2, b3 = self.store_blobs([b"foo", b"bar", b"baz"]) f = BytesIO() build_pack( f, [ (REF_DELTA, (1, b"foo99")), (REF_DELTA, (b1.id, b"foo1")), (REF_DELTA, (b2.id, b"bar2")), (REF_DELTA, (b3.id, b"baz3")), ], store=self.store, ) del self.store[b2.id] del self.store[b3.id] pack_iter = self.make_pack_iter(f) try: list(pack_iter._walk_all_chains()) self.fail() except KeyError as e: self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),)) class DeltaEncodeSizeTests(TestCase): def test_basic(self): self.assertEqual(b"\x00", _delta_encode_size(0)) self.assertEqual(b"\x01", _delta_encode_size(1)) self.assertEqual(b"\xfa\x01", _delta_encode_size(250)) self.assertEqual(b"\xe8\x07", _delta_encode_size(1000)) self.assertEqual(b"\xa0\x8d\x06", _delta_encode_size(100000)) class EncodeCopyOperationTests(TestCase): def test_basic(self): self.assertEqual(b"\x80", _encode_copy_operation(0, 0)) self.assertEqual(b"\x91\x01\x0a", _encode_copy_operation(1, 10)) self.assertEqual(b"\xb1\x64\xe8\x03", _encode_copy_operation(100, 1000)) self.assertEqual(b"\x93\xe8\x03\x01", _encode_copy_operation(1000, 1)) diff --git a/dulwich/tests/utils.py b/dulwich/tests/utils.py index b547667b..09f92cb3 100644 --- a/dulwich/tests/utils.py +++ b/dulwich/tests/utils.py @@ -1,388 +1,378 @@ # utils.py -- Test utilities for Dulwich. # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Utility functions common to Dulwich tests.""" import datetime import os import shutil -import stat import tempfile import time import types import warnings from dulwich.index import ( commit_tree, ) from dulwich.objects import ( FixedSha, Commit, Tag, object_class, ) from dulwich.pack import ( OFS_DELTA, REF_DELTA, DELTA_TYPES, obj_sha, SHA1Writer, write_pack_header, write_pack_object, create_delta, ) from dulwich.repo import Repo from dulwich.tests import ( # noqa: F401 skipIf, SkipTest, ) # Plain files are very frequently used in tests, so let the mode be very short. F = 0o100644 # Shorthand mode for Files. def open_repo(name, temp_dir=None): """Open a copy of a repo in a temporary directory. Use this function for accessing repos in dulwich/tests/data/repos to avoid accidentally or intentionally modifying those repos in place. Use tear_down_repo to delete any temp files created. Args: name: The name of the repository, relative to dulwich/tests/data/repos temp_dir: temporary directory to initialize to. If not provided, a temporary directory will be created. Returns: An initialized Repo object that lives in a temporary directory. """ if temp_dir is None: temp_dir = tempfile.mkdtemp() repo_dir = os.path.join(os.path.dirname(__file__), "data", "repos", name) temp_repo_dir = os.path.join(temp_dir, name) shutil.copytree(repo_dir, temp_repo_dir, symlinks=True) return Repo(temp_repo_dir) -def safe_rmtree(path): - """Version of shutil.rmtree() that handles unwritable files""" - def really_delete(action, name, exc): - os.chmod(name, stat.S_IWRITE) - os.remove(name) - - shutil.rmtree(path, onerror=really_delete) - - def tear_down_repo(repo): """Tear down a test repository.""" repo.close() temp_dir = os.path.dirname(repo.path.rstrip(os.sep)) - safe_rmtree(temp_dir) + shutil.rmtree(temp_dir) def make_object(cls, **attrs): """Make an object for testing and assign some members. This method creates a new subclass to allow arbitrary attribute reassignment, which is not otherwise possible with objects having __slots__. Args: attrs: dict of attributes to set on the new object. Returns: A newly initialized object of type cls. """ class TestObject(cls): """Class that inherits from the given class, but without __slots__. Note that classes with __slots__ can't have arbitrary attributes monkey-patched in, so this is a class that is exactly the same only with a __dict__ instead of __slots__. """ pass TestObject.__name__ = "TestObject_" + cls.__name__ obj = TestObject() for name, value in attrs.items(): if name == "id": # id property is read-only, so we overwrite sha instead. sha = FixedSha(value) obj.sha = lambda: sha else: setattr(obj, name, value) return obj def make_commit(**attrs): """Make a Commit object with a default set of members. Args: attrs: dict of attributes to overwrite from the default values. Returns: A newly initialized Commit object. """ default_time = 1262304000 # 2010-01-01 00:00:00 all_attrs = { "author": b"Test Author ", "author_time": default_time, "author_timezone": 0, "committer": b"Test Committer ", "commit_time": default_time, "commit_timezone": 0, "message": b"Test message.", "parents": [], "tree": b"0" * 40, } all_attrs.update(attrs) return make_object(Commit, **all_attrs) def make_tag(target, **attrs): """Make a Tag object with a default set of values. Args: target: object to be tagged (Commit, Blob, Tree, etc) attrs: dict of attributes to overwrite from the default values. Returns: A newly initialized Tag object. """ target_id = target.id target_type = object_class(target.type_name) default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple())) all_attrs = { "tagger": b"Test Author ", "tag_time": default_time, "tag_timezone": 0, "message": b"Test message.", "object": (target_type, target_id), "name": b"Test Tag", } all_attrs.update(attrs) return make_object(Tag, **all_attrs) def functest_builder(method, func): """Generate a test method that tests the given function.""" def do_test(self): method(self, func) return do_test def ext_functest_builder(method, func): """Generate a test method that tests the given extension function. This is intended to generate test methods that test both a pure-Python version and an extension version using common test code. The extension test will raise SkipTest if the extension is not found. Sample usage: class MyTest(TestCase); def _do_some_test(self, func_impl): self.assertEqual('foo', func_impl()) test_foo = functest_builder(_do_some_test, foo_py) test_foo_extension = ext_functest_builder(_do_some_test, _foo_c) Args: method: The method to run. It must must two parameters, self and the function implementation to test. func: The function implementation to pass to method. """ def do_test(self): if not isinstance(func, types.BuiltinFunctionType): raise SkipTest("%s extension not found" % func) method(self, func) return do_test def build_pack(f, objects_spec, store=None): """Write test pack data from a concise spec. Args: f: A file-like object to write the pack to. objects_spec: A list of (type_num, obj). For non-delta types, obj is the string of that object's data. For delta types, obj is a tuple of (base, data), where: * base can be either an index in objects_spec of the base for that * delta; or for a ref delta, a SHA, in which case the resulting pack * will be thin and the base will be an external ref. * data is a string of the full, non-deltified data for that object. Note that offsets/refs and deltas are computed within this function. store: An optional ObjectStore for looking up external refs. Returns: A list of tuples in the order specified by objects_spec: (offset, type num, data, sha, CRC32) """ sf = SHA1Writer(f) num_objects = len(objects_spec) write_pack_header(sf, num_objects) full_objects = {} offsets = {} crc32s = {} while len(full_objects) < num_objects: for i, (type_num, data) in enumerate(objects_spec): if type_num not in DELTA_TYPES: full_objects[i] = (type_num, data, obj_sha(type_num, [data])) continue base, data = data if isinstance(base, int): if base not in full_objects: continue base_type_num, _, _ = full_objects[base] else: base_type_num, _ = store.get_raw(base) full_objects[i] = ( base_type_num, data, obj_sha(base_type_num, [data]), ) for i, (type_num, obj) in enumerate(objects_spec): offset = f.tell() if type_num == OFS_DELTA: base_index, data = obj base = offset - offsets[base_index] _, base_data, _ = full_objects[base_index] obj = (base, create_delta(base_data, data)) elif type_num == REF_DELTA: base_ref, data = obj if isinstance(base_ref, int): _, base_data, base = full_objects[base_ref] else: base_type_num, base_data = store.get_raw(base_ref) base = obj_sha(base_type_num, base_data) obj = (base, create_delta(base_data, data)) crc32 = write_pack_object(sf, type_num, obj) offsets[i] = offset crc32s[i] = crc32 expected = [] for i in range(num_objects): type_num, data, sha = full_objects[i] assert len(sha) == 20 expected.append((offsets[i], type_num, data, sha, crc32s[i])) sf.write_sha() f.seek(0) return expected def build_commit_graph(object_store, commit_spec, trees=None, attrs=None): """Build a commit graph from a concise specification. Sample usage: >>> c1, c2, c3 = build_commit_graph(store, [[1], [2, 1], [3, 1, 2]]) >>> store[store[c3].parents[0]] == c1 True >>> store[store[c3].parents[1]] == c2 True If not otherwise specified, commits will refer to the empty tree and have commit times increasing in the same order as the commit spec. Args: object_store: An ObjectStore to commit objects to. commit_spec: An iterable of iterables of ints defining the commit graph. Each entry defines one commit, and entries must be in topological order. The first element of each entry is a commit number, and the remaining elements are its parents. The commit numbers are only meaningful for the call to make_commits; since real commit objects are created, they will get created with real, opaque SHAs. trees: An optional dict of commit number -> tree spec for building trees for commits. The tree spec is an iterable of (path, blob, mode) or (path, blob) entries; if mode is omitted, it defaults to the normal file mode (0100644). attrs: A dict of commit number -> (dict of attribute -> value) for assigning additional values to the commits. Returns: The list of commit objects created. Raises: ValueError: If an undefined commit identifier is listed as a parent. """ if trees is None: trees = {} if attrs is None: attrs = {} commit_time = 0 nums = {} commits = [] for commit in commit_spec: commit_num = commit[0] try: parent_ids = [nums[pn] for pn in commit[1:]] except KeyError as e: (missing_parent,) = e.args raise ValueError("Unknown parent %i" % missing_parent) blobs = [] for entry in trees.get(commit_num, []): if len(entry) == 2: path, blob = entry entry = (path, blob, F) path, blob, mode = entry blobs.append((path, blob.id, mode)) object_store.add_object(blob) tree_id = commit_tree(object_store, blobs) commit_attrs = { "message": ("Commit %i" % commit_num).encode("ascii"), "parents": parent_ids, "tree": tree_id, "commit_time": commit_time, } commit_attrs.update(attrs.get(commit_num, {})) commit_obj = make_commit(**commit_attrs) # By default, increment the time by a lot. Out-of-order commits should # be closer together than this because their main cause is clock skew. commit_time = commit_attrs["commit_time"] + 100 nums[commit_num] = commit_obj.id object_store.add_object(commit_obj) commits.append(commit_obj) return commits def setup_warning_catcher(): """Wrap warnings.showwarning with code that records warnings.""" caught_warnings = [] original_showwarning = warnings.showwarning def custom_showwarning(*args, **kwargs): caught_warnings.append(args[0]) warnings.showwarning = custom_showwarning def restore_showwarning(): warnings.showwarning = original_showwarning return caught_warnings, restore_showwarning