diff --git a/dulwich/index.py b/dulwich/index.py index 4b464bde..3afb4363 100644 --- a/dulwich/index.py +++ b/dulwich/index.py @@ -1,990 +1,988 @@ # index.py -- File parser/writer for the git index file # Copyright (C) 2008-2013 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Parser for the git index file format.""" import collections import os import stat import struct import sys from typing import ( Any, BinaryIO, Callable, Dict, List, Optional, TYPE_CHECKING, Iterable, Iterator, Tuple, ) if TYPE_CHECKING: from dulwich.object_store import BaseObjectStore from dulwich.file import GitFile from dulwich.objects import ( Blob, S_IFGITLINK, S_ISGITLINK, Tree, hex_to_sha, sha_to_hex, ) from dulwich.pack import ( SHA1Reader, SHA1Writer, ) # TODO(jelmer): Switch to dataclass? IndexEntry = collections.namedtuple( "IndexEntry", [ "ctime", "mtime", "dev", "ino", "mode", "uid", "gid", "size", "sha", "flags", "extended_flags", ], ) # 2-bit stage (during merge) FLAG_STAGEMASK = 0x3000 # assume-valid FLAG_VALID = 0x8000 # extended flag (must be zero in version 2) FLAG_EXTENDED = 0x4000 # used by sparse checkout EXTENDED_FLAG_SKIP_WORKTREE = 0x4000 # used by "git add -N" EXTENDED_FLAG_INTEND_TO_ADD = 0x2000 DEFAULT_VERSION = 2 def pathsplit(path): """Split a /-delimited path into a directory part and a basename. Args: path: The path to split. Returns: Tuple with directory name and basename """ try: (dirname, basename) = path.rsplit(b"/", 1) except ValueError: return (b"", path) else: return (dirname, basename) def pathjoin(*args): """Join a /-delimited path.""" return b"/".join([p for p in args if p]) def read_cache_time(f): """Read a cache time. Args: f: File-like object to read from Returns: Tuple with seconds and nanoseconds """ return struct.unpack(">LL", f.read(8)) def write_cache_time(f, t): """Write a cache time. Args: f: File-like object to write to t: Time to write (as int, float or tuple with secs and nsecs) """ if isinstance(t, int): t = (t, 0) elif isinstance(t, float): (secs, nsecs) = divmod(t, 1.0) t = (int(secs), int(nsecs * 1000000000)) elif not isinstance(t, tuple): raise TypeError(t) f.write(struct.pack(">LL", *t)) def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]: """Read an entry from a cache file. Args: f: File-like object to read from Returns: tuple with: name, IndexEntry """ beginoffset = f.tell() ctime = read_cache_time(f) mtime = read_cache_time(f) ( dev, ino, mode, uid, gid, size, sha, flags, ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2)) if flags & FLAG_EXTENDED: if version < 3: raise AssertionError( 'extended flag set in index with version < 3') extended_flags = struct.unpack(">H", f.read(2)) else: extended_flags = 0 name = f.read((flags & 0x0FFF)) # Padding: real_size = (f.tell() - beginoffset + 8) & ~7 f.read((beginoffset + real_size) - f.tell()) return ( name, IndexEntry( ctime, mtime, dev, ino, mode, uid, gid, size, sha_to_hex(sha), flags & ~0x0FFF, extended_flags, )) def write_cache_entry(f, name, entry, version=None): """Write an index entry to a file. Args: f: File object entry: IndexEntry to write, tuple with: """ beginoffset = f.tell() write_cache_time(f, entry.ctime) write_cache_time(f, entry.mtime) flags = len(name) | (entry.flags & ~0x0FFF) if entry.extended_flags: flags |= FLAG_EXTENDED if flags & FLAG_EXTENDED and version is not None and version < 3: raise AssertionError('unable to use extended flags in version < 3') f.write( struct.pack( b">LLLLLL20sH", entry.dev & 0xFFFFFFFF, entry.ino & 0xFFFFFFFF, entry.mode, entry.uid, entry.gid, entry.size, hex_to_sha(entry.sha), flags, ) ) if flags & FLAG_EXTENDED: f.write(struct.pack(b">H", entry.extended_flags)) f.write(name) real_size = (f.tell() - beginoffset + 8) & ~7 f.write(b"\0" * ((beginoffset + real_size) - f.tell())) def read_index(f: BinaryIO): """Read an index file, yielding the individual entries.""" header = f.read(4) if header != b"DIRC": raise AssertionError("Invalid index file header: %r" % header) (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2)) assert version in (1, 2, 3), "index version is %r" % version for i in range(num_entries): yield read_cache_entry(f, version) def read_index_dict(f): """Read an index file and return it as a dictionary. Args: f: File object to read from """ ret = {} for name, entry in read_index(f): ret[name] = entry return ret def write_index(f: BinaryIO, entries: Iterable[Tuple[bytes, IndexEntry]], version: Optional[int] = None): """Write an index file. Args: f: File-like object to write to version: Version number to write entries: Iterable over the entries to write """ if version is None: version = DEFAULT_VERSION f.write(b"DIRC") f.write(struct.pack(b">LL", version, len(entries))) for name, entry in entries: write_cache_entry(f, name, entry, version) def write_index_dict( f: BinaryIO, entries: Dict[bytes, IndexEntry], version: Optional[int] = None, ) -> None: """Write an index file based on the contents of a dictionary.""" entries_list = [] for name in sorted(entries): entries_list.append((name, entries[name])) write_index(f, entries_list, version=version) def cleanup_mode(mode: int) -> int: """Cleanup a mode value. This will return a mode that can be stored in a tree object. Args: mode: Mode to clean up. Returns: mode """ if stat.S_ISLNK(mode): return stat.S_IFLNK elif stat.S_ISDIR(mode): return stat.S_IFDIR elif S_ISGITLINK(mode): return S_IFGITLINK ret = stat.S_IFREG | 0o644 if mode & 0o100: ret |= 0o111 return ret class Index(object): """A Git Index file.""" def __init__(self, filename): """Open an index file. Args: filename: Path to the index file """ self._filename = filename # TODO(jelmer): Store the version returned by read_index self._version = None self.clear() self.read() @property def path(self): return self._filename def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._filename) def write(self) -> None: """Write current contents of index to disk.""" f = GitFile(self._filename, "wb") try: f = SHA1Writer(f) write_index_dict(f, self._byname, version=self._version) finally: f.close() def read(self): """Read current contents of index from disk.""" if not os.path.exists(self._filename): return f = GitFile(self._filename, "rb") try: f = SHA1Reader(f) for name, entry in read_index(f): self[name] = entry # FIXME: Additional data? f.read(os.path.getsize(self._filename) - f.tell() - 20) f.check_sha() finally: f.close() def __len__(self) -> int: """Number of entries in this index file.""" return len(self._byname) def __getitem__(self, name: bytes) -> IndexEntry: """Retrieve entry by relative path. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) """ return self._byname[name] def __iter__(self) -> Iterator[bytes]: """Iterate over the paths in this index.""" return iter(self._byname) def get_sha1(self, path: bytes) -> bytes: """Return the (git object) SHA1 for the object at a path.""" return self[path].sha def get_mode(self, path: bytes) -> int: """Return the POSIX file mode for the object at a path.""" return self[path].mode def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]: """Iterate over path, sha, mode tuples for use with commit_tree.""" for path in self: entry = self[path] yield path, entry.sha, cleanup_mode(entry.mode) def iterblobs(self): import warnings warnings.warn("Use iterobjects() instead.", PendingDeprecationWarning) return self.iterobjects() def clear(self): """Remove all contents from this index.""" self._byname = {} def __setitem__(self, name, x): assert isinstance(name, bytes) assert len(x) == len(IndexEntry._fields) # Remove the old entry if any self._byname[name] = IndexEntry(*x) def __delitem__(self, name): assert isinstance(name, bytes) del self._byname[name] def iteritems(self): return self._byname.items() def items(self): return self._byname.items() def update(self, entries): for name, value in entries.items(): self[name] = value def changes_from_tree(self, object_store, tree, want_unchanged=False): """Find the differences between the contents of this index and a tree. Args: object_store: Object store to use for retrieving tree contents tree: SHA1 of the root tree want_unchanged: Whether unchanged files should be reported Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) """ def lookup_entry(path): entry = self[path] return entry.sha, cleanup_mode(entry.mode) for (name, mode, sha) in changes_from_tree( self._byname.keys(), lookup_entry, object_store, tree, want_unchanged=want_unchanged, ): yield (name, mode, sha) def commit(self, object_store): """Create a new tree from an index. Args: object_store: Object store to save the tree in Returns: Root tree SHA """ return commit_tree(object_store, self.iterobjects()) def commit_tree( object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]] ) -> bytes: """Commit a new tree. Args: object_store: Object store to add trees to blobs: Iterable over blob path, sha, mode entries Returns: SHA1 of the created tree. """ trees = {b"": {}} # type: Dict[bytes, Any] def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, bytes) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): tree = Tree() for basename, entry in trees[path].items(): if isinstance(entry, dict): mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) object_store.add_object(tree) return tree.id return build_tree(b"") def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes: """Create a new tree from an index. Args: object_store: Object store to save the tree in index: Index file Note: This function is deprecated, use index.commit() instead. Returns: Root tree sha. """ return commit_tree(object_store, index.iterobjects()) def changes_from_tree( names: Iterable[bytes], lookup_entry: Callable[[bytes], Tuple[bytes, int]], object_store: "BaseObjectStore", tree: Optional[bytes], want_unchanged=False, ) -> Iterable[ Tuple[ Tuple[Optional[bytes], Optional[bytes]], Tuple[Optional[int], Optional[int]], Tuple[Optional[bytes], Optional[bytes]], ] ]: """Find the differences between the contents of a tree and a working copy. Args: names: Iterable of names in the working copy lookup_entry: Function to lookup an entry in the working copy object_store: Object store to use for retrieving tree contents tree: SHA1 of the root tree, or None for an empty tree want_unchanged: Whether unchanged files should be reported Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) """ # TODO(jelmer): Support a include_trees option other_names = set(names) if tree is not None: for (name, mode, sha) in object_store.iter_tree_contents(tree): try: (other_sha, other_mode) = lookup_entry(name) except KeyError: # Was removed yield ((name, None), (mode, None), (sha, None)) else: other_names.remove(name) if want_unchanged or other_sha != sha or other_mode != mode: yield ((name, name), (mode, other_mode), (sha, other_sha)) # Mention added files for name in other_names: try: (other_sha, other_mode) = lookup_entry(name) except KeyError: pass else: yield ((None, name), (None, other_mode), (None, other_sha)) def index_entry_from_stat( stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None, extended_flags: Optional[int] =None ): """Create a new index entry from a stat value. Args: stat_val: POSIX stat_result instance hex_sha: Hex sha of the object flags: Index flags """ if mode is None: mode = cleanup_mode(stat_val.st_mode) return IndexEntry( stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev, stat_val.st_ino, mode, stat_val.st_uid, stat_val.st_gid, stat_val.st_size, hex_sha, flags, extended_flags ) def build_file_from_blob( blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8" ): """Build a file or symlink on disk based on a Git object. Args: obj: The git object mode: File mode target_path: Path to write to honor_filemode: An optional flag to honor core.filemode setting in config file, default is core.filemode=True, change executable bit Returns: stat object for the file """ try: oldstat = os.lstat(target_path) except FileNotFoundError: oldstat = None contents = blob.as_raw_string() if stat.S_ISLNK(mode): # FIXME: This will fail on Windows. What should we do instead? if oldstat: os.unlink(target_path) if sys.platform == "win32": # os.readlink on Python3 on Windows requires a unicode string. contents = contents.decode(tree_encoding) target_path = target_path.decode(tree_encoding) os.symlink(contents, target_path) else: if oldstat is not None and oldstat.st_size == len(contents): with open(target_path, "rb") as f: if f.read() == contents: return oldstat with open(target_path, "wb") as f: # Write out file f.write(contents) if honor_filemode: os.chmod(target_path, mode) return os.lstat(target_path) INVALID_DOTNAMES = (b".git", b".", b"..", b"") def validate_path_element_default(element): return element.lower() not in INVALID_DOTNAMES def validate_path_element_ntfs(element): stripped = element.rstrip(b". ").lower() if stripped in INVALID_DOTNAMES: return False if stripped == b"git~1": return False return True def validate_path(path, element_validator=validate_path_element_default): """Default path validator that just checks for .git/.""" parts = path.split(b"/") for p in parts: if not element_validator(p): return False else: return True def build_index_from_tree( root_path, index_path, object_store, tree_id, honor_filemode=True, validate_path_element=validate_path_element_default, ): """Generate and materialize index from a tree Args: tree_id: Tree to materialize root_path: Target dir for materialized index files index_path: Target path for generated index object_store: Non-empty object store holding tree contents honor_filemode: An optional flag to honor core.filemode setting in config file, default is core.filemode=True, change executable bit validate_path_element: Function to validate path elements to check out; default just refuses .git and .. directories. Note: existing index is wiped and contents are not merged in a working dir. Suitable only for fresh clones. """ index = Index(index_path) if not isinstance(root_path, bytes): root_path = os.fsencode(root_path) for entry in object_store.iter_tree_contents(tree_id): if not validate_path(entry.path, validate_path_element): continue full_path = _tree_to_fs_path(root_path, entry.path) if not os.path.exists(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) # TODO(jelmer): Merge new index into working tree if S_ISGITLINK(entry.mode): if not os.path.isdir(full_path): os.mkdir(full_path) st = os.lstat(full_path) # TODO(jelmer): record and return submodule paths else: obj = object_store[entry.sha] st = build_file_from_blob( obj, entry.mode, full_path, honor_filemode=honor_filemode ) # Add file to index if not honor_filemode or S_ISGITLINK(entry.mode): # we can not use tuple slicing to build a new tuple, # because on windows that will convert the times to # longs, which causes errors further along st_tuple = ( entry.mode, st.st_ino, st.st_dev, st.st_nlink, st.st_uid, st.st_gid, st.st_size, st.st_atime, st.st_mtime, st.st_ctime, ) st = st.__class__(st_tuple) index[entry.path] = index_entry_from_stat(st, entry.sha, 0) index.write() def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"): """Create a blob from a path and a stat object. Args: fs_path: Full file system path to file st: A stat object Returns: A `Blob` object """ assert isinstance(fs_path, bytes) blob = Blob() if stat.S_ISLNK(mode): if sys.platform == "win32": # os.readlink on Python3 on Windows requires a unicode string. fs_path = os.fsdecode(fs_path) blob.data = os.readlink(fs_path).encode(tree_encoding) else: blob.data = os.readlink(fs_path) else: with open(fs_path, "rb") as f: blob.data = f.read() return blob def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"): """Create a blob from a path and a stat object. Args: fs_path: Full file system path to file st: A stat object Returns: A `Blob` object """ return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding) def read_submodule_head(path): """Read the head commit of a submodule. Args: path: path to the submodule Returns: HEAD sha, None if not a valid head/repository """ from dulwich.errors import NotGitRepository from dulwich.repo import Repo # Repo currently expects a "str", so decode if necessary. # TODO(jelmer): Perhaps move this into Repo() ? if not isinstance(path, str): path = os.fsdecode(path) try: repo = Repo(path) except NotGitRepository: return None try: return repo.head() except KeyError: return None def _has_directory_changed(tree_path, entry): """Check if a directory has changed after getting an error. When handling an error trying to create a blob from a path, call this function. It will check if the path is a directory. If it's a directory and a submodule, check the submodule head to see if it's has changed. If not, consider the file as changed as Git tracked a file and not a directory. Return true if the given path should be considered as changed and False otherwise or if the path is not a directory. """ # This is actually a directory if os.path.exists(os.path.join(tree_path, b".git")): # Submodule head = read_submodule_head(tree_path) if entry.sha != head: return True else: # The file was changed to a directory, so consider it removed. return True return False -def get_unstaged_changes(index: Index, repo, filter_blob_callback=None): +def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None): """Walk through an index and check for differences against working tree. Args: index: index to check root_path: path in which to find files Returns: iterator over paths with unstaged changes """ - root_path = repo.path # For each entry in the index check the sha1 & ensure not staged if not isinstance(root_path, bytes): root_path = os.fsencode(root_path) for tree_path, entry in index.iteritems(): full_path = _tree_to_fs_path(root_path, tree_path) try: st = os.lstat(full_path) if stat.S_ISDIR(st.st_mode): if _has_directory_changed(tree_path, entry): yield tree_path continue if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode): continue blob = blob_from_path_and_stat(full_path, st) if filter_blob_callback is not None: - # Check if the file is already in the index try: index[tree_path] new_file = False except KeyError: new_file = True blob = filter_blob_callback(blob, tree_path, new_file) except FileNotFoundError: # The file was removed, so we assume that counts as # different from whatever file used to exist. yield tree_path else: if blob.id != entry.sha: yield tree_path os_sep_bytes = os.sep.encode("ascii") def _tree_to_fs_path(root_path, tree_path: bytes): """Convert a git tree path to a file system path. Args: root_path: Root filesystem path tree_path: Git tree path as bytes Returns: File system path. """ assert isinstance(tree_path, bytes) if os_sep_bytes != b"/": sep_corrected_path = tree_path.replace(b"/", os_sep_bytes) else: sep_corrected_path = tree_path return os.path.join(root_path, sep_corrected_path) def _fs_to_tree_path(fs_path): """Convert a file system path to a git tree path. Args: fs_path: File system path. Returns: Git tree path as bytes """ if not isinstance(fs_path, bytes): fs_path_bytes = os.fsencode(fs_path) else: fs_path_bytes = fs_path if os_sep_bytes != b"/": tree_path = fs_path_bytes.replace(os_sep_bytes, b"/") else: tree_path = fs_path_bytes return tree_path def index_entry_from_path(path, object_store=None): """Create an index from a filesystem path. This returns an index value for files, symlinks and tree references. for directories and non-existant files it returns None Args: path: Path to create an index entry for object_store: Optional object store to save new blobs in Returns: An index entry; None for directories """ assert isinstance(path, bytes) st = os.lstat(path) if stat.S_ISDIR(st.st_mode): if os.path.exists(os.path.join(path, b".git")): head = read_submodule_head(path) if head is None: return None return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK) return None if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode): blob = blob_from_path_and_stat(path, st) if object_store is not None: object_store.add_object(blob) return index_entry_from_stat(st, blob.id, 0) return None def iter_fresh_entries( paths, root_path, object_store: Optional["BaseObjectStore"] = None ): """Iterate over current versions of index entries on disk. Args: paths: Paths to iterate over root_path: Root path to access from store: Optional store to save new blobs in Returns: Iterator over path, index_entry """ for path in paths: p = _tree_to_fs_path(root_path, path) try: entry = index_entry_from_path(p, object_store=object_store) except (FileNotFoundError, IsADirectoryError): entry = None yield path, entry def iter_fresh_blobs(index, root_path): """Iterate over versions of blobs on disk referenced by index. Don't use this function; it removes missing entries from index. Args: index: Index file root_path: Root path to access from include_deleted: Include deleted entries with sha and mode set to None Returns: Iterator over path, sha, mode """ import warnings warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.") for entry in iter_fresh_objects(index, root_path, include_deleted=True): if entry[1] is None: del index[entry[0]] else: yield entry def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None): """Iterate over versions of objecs on disk referenced by index. Args: root_path: Root path to access from include_deleted: Include deleted entries with sha and mode set to None object_store: Optional object store to report new items to Returns: Iterator over path, sha, mode """ for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store): if entry is None: if include_deleted: yield path, None, None else: entry = IndexEntry(*entry) yield path, entry.sha, cleanup_mode(entry.mode) def refresh_index(index, root_path): """Refresh the contents of an index. This is the equivalent to running 'git commit -a'. Args: index: Index to update root_path: Root filesystem path """ for path, entry in iter_fresh_entries(index, root_path): index[path] = path diff --git a/dulwich/line_ending.py b/dulwich/line_ending.py index 6f149a20..690beada 100644 --- a/dulwich/line_ending.py +++ b/dulwich/line_ending.py @@ -1,293 +1,288 @@ # line_ending.py -- Line ending conversion functions # Copyright (C) 2018-2018 Boris Feld # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """ All line-ending related functions, from conversions to config processing Line-ending normalization is a complex beast. Here is some notes and details about how it seems to work. The normalization is a two-fold process that happens at two moments: - When reading a file from the index and to the working directory. For example when doing a `git clone` or `git checkout` call. We call this process the read filter in this module. - When writing a file to the index from the working directory. For example when doing a `git add` call. We call this process the write filter in this module. -The normalization only happens when the resulting file does not exists yet. -For the write filter, they are files that are shown as added in status. +Note that when checking status (getting unstaged changes), whether or not +normalization is done on write depends on whether or not the file in the +working dir has also been normalized on read: + +- For autocrlf=true all files are always normalized on both read and write. +- For autocrlf=input files are only normalized once - whenever a new file is + added to the index. Since files which already exist in the index are + unmodified on read, they are also left unmodified upon subsequent writes. One thing to know is that Git does line-ending normalization only on text files. How does Git know that a file is text? We can either mark a file as a text file, a binary file or ask Git to automatically decides. Git has an heuristic to detect if a file is a text file or a binary file. It seems based on the percentage of non-printable characters in files. The code for this heuristic is here: https://git.kernel.org/pub/scm/git/git.git/tree/convert.c#n46 Dulwich have an implementation with a slightly different heuristic, the `is_binary` function in `dulwich.patch`. The binary detection heuristic implementation is close to the one in JGit: https://github.com/eclipse/jgit/blob/f6873ffe522bbc3536969a3a3546bf9a819b92bf/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java#L300 There is multiple variables that impact the normalization. First, a repository can contains a `.gitattributes` file (or more than one...) that can further customize the operation on some file patterns, for example: *.txt text Force all `.txt` files to be treated as text files and to have their lines endings normalized. *.jpg -text Force all `.jpg` files to be treated as binary files and to not have their lines endings converted. *.vcproj text eol=crlf Force all `.vcproj` files to be treated as text files and to have their lines endings converted into `CRLF` in working directory no matter the native EOL of the platform. *.sh text eol=lf Force all `.sh` files to be treated as text files and to have their lines endings converted into `LF` in working directory no matter the native EOL of the platform. If the `eol` attribute is not defined, Git uses the `core.eol` configuration value described later. * text=auto Force all files to be scanned by the text file heuristic detection and to have their line endings normalized in case they are detected as text files. Git also have a obsolete attribute named `crlf` that can be translated to the corresponding text attribute value. Then there are some configuration option (that can be defined at the repository or user level): - core.autocrlf - core.eol `core.autocrlf` is taken into account for all files that doesn't have a `text` attribute defined in `.gitattributes`; it takes three possible values: - `true`: This forces all files on the working directory to have CRLF line-endings in the working directory and convert line-endings to LF when writing to the index. When autocrlf is set to true, eol value is ignored. - `input`: Quite similar to the `true` value but only force the write filter, ie line-ending of new files added to the index will get their line-endings converted to LF. - `false` (default): No normalization is done. `core.eol` is the top-level configuration to define the line-ending to use when applying the read_filer. It takes three possible values: - `lf`: When normalization is done, force line-endings to be `LF` in the working directory. - `crlf`: When normalization is done, force line-endings to be `CRLF` in the working directory. - `native` (default): When normalization is done, force line-endings to be the platform's native line ending. One thing to remember is when line-ending normalization is done on a file, Git always normalize line-ending to `LF` when writing to the index. There are sources that seems to indicate that Git won't do line-ending normalization when a file contains mixed line-endings. I think this logic might be in text / binary detection heuristic but couldn't find it yet. Sources: - https://git-scm.com/docs/git-config#git-config-coreeol - https://git-scm.com/docs/git-config#git-config-coreautocrlf - https://git-scm.com/docs/gitattributes#_checking_out_and_checking_in - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/ """ from dulwich.objects import Blob from dulwich.patch import is_binary CRLF = b"\r\n" LF = b"\n" def convert_crlf_to_lf(text_hunk): """Convert CRLF in text hunk into LF Args: text_hunk: A bytes string representing a text hunk Returns: The text hunk with the same type, with CRLF replaced into LF """ return text_hunk.replace(CRLF, LF) def convert_lf_to_crlf(text_hunk): """Convert LF in text hunk into CRLF Args: text_hunk: A bytes string representing a text hunk Returns: The text hunk with the same type, with LF replaced into CRLF """ # TODO find a more efficient way of doing it intermediary = text_hunk.replace(CRLF, LF) return intermediary.replace(LF, CRLF) def get_checkout_filter(core_eol, core_autocrlf, git_attributes): """Returns the correct checkout filter based on the passed arguments""" # TODO this function should process the git_attributes for the path and if # the text attribute is not defined, fallback on the # get_checkout_filter_autocrlf function with the autocrlf value return get_checkout_filter_autocrlf(core_autocrlf) def get_checkin_filter(core_eol, core_autocrlf, git_attributes): """Returns the correct checkin filter based on the passed arguments""" # TODO this function should process the git_attributes for the path and if # the text attribute is not defined, fallback on the # get_checkin_filter_autocrlf function with the autocrlf value return get_checkin_filter_autocrlf(core_autocrlf) def get_checkout_filter_autocrlf(core_autocrlf): """Returns the correct checkout filter base on autocrlf value Args: core_autocrlf: The bytes configuration value of core.autocrlf. Valid values are: b'true', b'false' or b'input'. Returns: Either None if no filter has to be applied or a function accepting a single argument, a binary text hunk """ if core_autocrlf == b"true": return convert_lf_to_crlf return None def get_checkin_filter_autocrlf(core_autocrlf): """Returns the correct checkin filter base on autocrlf value Args: core_autocrlf: The bytes configuration value of core.autocrlf. Valid values are: b'true', b'false' or b'input'. Returns: Either None if no filter has to be applied or a function accepting a single argument, a binary text hunk """ if core_autocrlf == b"true" or core_autocrlf == b"input": return convert_crlf_to_lf # Checking filter should never be `convert_lf_to_crlf` return None class BlobNormalizer(object): """An object to store computation result of which filter to apply based on configuration, gitattributes, path and operation (checkin or checkout) """ def __init__(self, config_stack, gitattributes): self.config_stack = config_stack self.gitattributes = gitattributes # Compute which filters we needs based on parameters try: core_eol = config_stack.get("core", "eol") except KeyError: core_eol = "native" try: core_autocrlf = config_stack.get("core", "autocrlf").lower() except KeyError: core_autocrlf = False self.fallback_read_filter = get_checkout_filter( core_eol, core_autocrlf, self.gitattributes ) self.fallback_write_filter = get_checkin_filter( core_eol, core_autocrlf, self.gitattributes ) - def checkin_normalize(self, blob, tree_path, new_file=True): - """ Normalize a blob during a checkin operation - - new_file is set to True by default for backward-compatibility - """ - if not new_file: - # Line-ending normalization only happens for new files, aka files - # not already commited - return blob - - if self.fallback_write_filter is not None: + def checkin_normalize(self, blob, tree_path, new_file=False): + """Normalize a blob during a checkin operation""" + # Existing files should only be normalized on checkin if it was + # previously normalized on checkout + if ( + self.fallback_write_filter is not None + and (self.fallback_read_filter is not None or new_file) + ): return normalize_blob( blob, self.fallback_write_filter, binary_detection=True ) return blob - def checkout_normalize(self, blob, tree_path, new_file=True): - """ Normalize a blob during a checkout operation - - new_file is set to True by default for backward-compatibility - """ - if not new_file: - # Line-ending normalization only happens for new files, aka files - # not already commited - return blob - + def checkout_normalize(self, blob, tree_path): + """Normalize a blob during a checkout operation""" if self.fallback_read_filter is not None: return normalize_blob( blob, self.fallback_read_filter, binary_detection=True ) return blob def normalize_blob(blob, conversion, binary_detection): """Takes a blob as input returns either the original blob if binary_detection is True and the blob content looks like binary, else return a new blob with converted data """ # Read the original blob data = blob.data # If we need to detect if a file is binary and the file is detected as # binary, do not apply the conversion function and return the original # chunked text if binary_detection is True: if is_binary(data): return blob # Now apply the conversion converted_data = conversion(data) new_blob = Blob() new_blob.data = converted_data return new_blob diff --git a/dulwich/porcelain.py b/dulwich/porcelain.py index 0209c88c..ea1722ed 100644 --- a/dulwich/porcelain.py +++ b/dulwich/porcelain.py @@ -1,1924 +1,1924 @@ # porcelain.py -- Porcelain-like layer on top of Dulwich # Copyright (C) 2013 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Simple wrapper that provides porcelain-like functions on top of Dulwich. Currently implemented: * archive * add * branch{_create,_delete,_list} * check-ignore * checkout * clone * commit * commit-tree * daemon * describe * diff-tree * fetch * init * ls-files * ls-remote * ls-tree * pull * push * rm * remote{_add} * receive-pack * reset * rev-list * tag{_create,_delete,_list} * upload-pack * update-server-info * status * symbolic-ref These functions are meant to behave similarly to the git subcommands. Differences in behaviour are considered bugs. Functions should generally accept both unicode strings and bytestrings """ from collections import namedtuple from contextlib import ( closing, contextmanager, ) from io import BytesIO, RawIOBase import datetime import os from pathlib import Path import posixpath import shutil import stat import sys import time from typing import ( Dict, Optional, Tuple, Union, ) from dulwich.archive import ( tar_stream, ) from dulwich.client import ( get_transport_and_path, ) from dulwich.config import ( StackedConfig, ) from dulwich.diff_tree import ( CHANGE_ADD, CHANGE_DELETE, CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY, RENAME_CHANGE_TYPES, ) from dulwich.errors import ( SendPackError, ) from dulwich.graph import ( can_fast_forward, ) from dulwich.ignore import IgnoreFilterManager from dulwich.index import ( blob_from_path_and_stat, get_unstaged_changes, ) from dulwich.object_store import ( tree_lookup_path, ) from dulwich.objects import ( Commit, Tag, format_timezone, parse_timezone, pretty_format_tree_entry, ) from dulwich.objectspec import ( parse_commit, parse_object, parse_ref, parse_reftuples, parse_tree, ) from dulwich.pack import ( write_pack_index, write_pack_objects, ) from dulwich.patch import write_tree_diff from dulwich.protocol import ( Protocol, ZERO_SHA, ) from dulwich.refs import ( ANNOTATED_TAG_SUFFIX, LOCAL_BRANCH_PREFIX, strip_peeled_refs, RefsContainer, ) from dulwich.repo import BaseRepo, Repo from dulwich.server import ( FileSystemBackend, TCPGitServer, ReceivePackHandler, UploadPackHandler, update_server_info as server_update_server_info, ) # Module level tuple definition for status output GitStatus = namedtuple("GitStatus", "staged unstaged untracked") class NoneStream(RawIOBase): """Fallback if stdout or stderr are unavailable, does nothing.""" def read(self, size=-1): return None def readall(self): return None def readinto(self, b): return None def write(self, b): return None default_bytes_out_stream = getattr(sys.stdout, "buffer", None) or NoneStream() default_bytes_err_stream = getattr(sys.stderr, "buffer", None) or NoneStream() DEFAULT_ENCODING = "utf-8" class Error(Exception): """Porcelain-based error. """ def __init__(self, msg, inner=None): super(Error, self).__init__(msg) self.inner = inner class RemoteExists(Error): """Raised when the remote already exists.""" def open_repo(path_or_repo): """Open an argument that can be a repository or a path for a repository.""" if isinstance(path_or_repo, BaseRepo): return path_or_repo return Repo(path_or_repo) @contextmanager def _noop_context_manager(obj): """Context manager that has the same api as closing but does nothing.""" yield obj def open_repo_closing(path_or_repo): """Open an argument that can be a repository or a path for a repository. returns a context manager that will close the repo on exit if the argument is a path, else does nothing if the argument is a repo. """ if isinstance(path_or_repo, BaseRepo): return _noop_context_manager(path_or_repo) return closing(Repo(path_or_repo)) def path_to_tree_path(repopath, path, tree_encoding=DEFAULT_ENCODING): """Convert a path to a path usable in an index, e.g. bytes and relative to the repository root. Args: repopath: Repository path, absolute or relative to the cwd path: A path, absolute or relative to the cwd Returns: A path formatted for use in e.g. an index """ # Pathlib resolve before Python 3.6 could raises FileNotFoundError in case # there is no file matching the path so we reuse the old implementation for # Python 3.5 if sys.version_info < (3, 6): if not isinstance(path, bytes): path = os.fsencode(path) if not isinstance(repopath, bytes): repopath = os.fsencode(repopath) treepath = os.path.relpath(path, repopath) if treepath.startswith(b".."): err_msg = "Path %r not in repo path (%r)" % (path, repopath) raise ValueError(err_msg) if os.path.sep != "/": treepath = treepath.replace(os.path.sep.encode("ascii"), b"/") return treepath else: # Resolve might returns a relative path on Windows # https://bugs.python.org/issue38671 if sys.platform == "win32": path = os.path.abspath(path) path = Path(path) resolved_path = path.resolve() # Resolve and abspath seems to behave differently regarding symlinks, # as we are doing abspath on the file path, we need to do the same on # the repo path or they might not match if sys.platform == "win32": repopath = os.path.abspath(repopath) repopath = Path(repopath).resolve() try: relpath = resolved_path.relative_to(repopath) except ValueError: # If path is a symlink that points to a file outside the repo, we # want the relpath for the link itself, not the resolved target if path.is_symlink(): parent = path.parent.resolve() relpath = (parent / path.name).relative_to(repopath) else: raise if sys.platform == "win32": return str(relpath).replace(os.path.sep, "/").encode(tree_encoding) else: return bytes(relpath) class DivergedBranches(Error): """Branches have diverged and fast-forward is not possible.""" def check_diverged(repo, current_sha, new_sha): """Check if updating to a sha can be done with fast forwarding. Args: repo: Repository object current_sha: Current head sha new_sha: New head sha """ try: can = can_fast_forward(repo, current_sha, new_sha) except KeyError: can = False if not can: raise DivergedBranches(current_sha, new_sha) def archive( repo, committish=None, outstream=default_bytes_out_stream, errstream=default_bytes_err_stream, ): """Create an archive. Args: repo: Path of repository for which to generate an archive. committish: Commit SHA1 or ref to use outstream: Output stream (defaults to stdout) errstream: Error stream (defaults to stderr) """ if committish is None: committish = "HEAD" with open_repo_closing(repo) as repo_obj: c = parse_commit(repo_obj, committish) for chunk in tar_stream( repo_obj.object_store, repo_obj.object_store[c.tree], c.commit_time ): outstream.write(chunk) def update_server_info(repo="."): """Update server info files for a repository. Args: repo: path to the repository """ with open_repo_closing(repo) as r: server_update_server_info(r) def symbolic_ref(repo, ref_name, force=False): """Set git symbolic ref into HEAD. Args: repo: path to the repository ref_name: short name of the new ref force: force settings without checking if it exists in refs/heads """ with open_repo_closing(repo) as repo_obj: ref_path = _make_branch_ref(ref_name) if not force and ref_path not in repo_obj.refs.keys(): raise Error("fatal: ref `%s` is not a ref" % ref_name) repo_obj.refs.set_symbolic_ref(b"HEAD", ref_path) def commit( repo=".", message=None, author=None, committer=None, encoding=None, no_verify=False, ): """Create a new commit. Args: repo: Path to repository message: Optional commit message author: Optional author name and email committer: Optional committer name and email no_verify: Skip pre-commit and commit-msg hooks Returns: SHA1 of the new commit """ # FIXME: Support --all argument # FIXME: Support --signoff argument if getattr(message, "encode", None): message = message.encode(encoding or DEFAULT_ENCODING) if getattr(author, "encode", None): author = author.encode(encoding or DEFAULT_ENCODING) if getattr(committer, "encode", None): committer = committer.encode(encoding or DEFAULT_ENCODING) with open_repo_closing(repo) as r: return r.do_commit( message=message, author=author, committer=committer, encoding=encoding, no_verify=no_verify, ) def commit_tree(repo, tree, message=None, author=None, committer=None): """Create a new commit object. Args: repo: Path to repository tree: An existing tree object author: Optional author name and email committer: Optional committer name and email """ with open_repo_closing(repo) as r: return r.do_commit( message=message, tree=tree, committer=committer, author=author ) def init(path=".", bare=False): """Create a new git repository. Args: path: Path to repository. bare: Whether to create a bare repository. Returns: A Repo instance """ if not os.path.exists(path): os.mkdir(path) if bare: return Repo.init_bare(path) else: return Repo.init(path) def clone( source, target=None, bare=False, checkout=None, errstream=default_bytes_err_stream, outstream=None, origin=b"origin", depth=None, **kwargs ): """Clone a local or remote git repository. Args: source: Path or URL for source repository target: Path to target repository (optional) bare: Whether or not to create a bare repository checkout: Whether or not to check-out HEAD after cloning errstream: Optional stream to write progress to outstream: Optional stream to write progress to (deprecated) origin: Name of remote from the repository used to clone depth: Depth to fetch at Returns: The new repository """ # TODO(jelmer): This code overlaps quite a bit with Repo.clone if outstream is not None: import warnings warnings.warn( "outstream= has been deprecated in favour of errstream=.", DeprecationWarning, stacklevel=3, ) errstream = outstream if checkout is None: checkout = not bare if checkout and bare: raise Error("checkout and bare are incompatible") if target is None: target = source.split("/")[-1] if not os.path.exists(target): os.mkdir(target) if bare: r = Repo.init_bare(target) else: r = Repo.init(target) reflog_message = b"clone: from " + source.encode("utf-8") try: target_config = r.get_config() if not isinstance(source, bytes): source = source.encode(DEFAULT_ENCODING) target_config.set((b"remote", origin), b"url", source) target_config.set( (b"remote", origin), b"fetch", b"+refs/heads/*:refs/remotes/" + origin + b"/*", ) target_config.write_to_path() fetch_result = fetch( r, origin, errstream=errstream, message=reflog_message, depth=depth, **kwargs ) for key, target in fetch_result.symrefs.items(): r.refs.set_symbolic_ref(key, target) try: head = r[fetch_result.refs[b"HEAD"]] except KeyError: head = None else: r[b"HEAD"] = head.id if checkout and not bare and head is not None: errstream.write(b"Checking out " + head.id + b"\n") r.reset_index(head.tree) except BaseException: shutil.rmtree(target) r.close() raise return r def add(repo=".", paths=None): """Add files to the staging area. Args: repo: Repository for the files paths: Paths to add. No value passed stages all modified files. Returns: Tuple with set of added files and ignored files If the repository contains ignored directories, the returned set will contain the path to an ignored directory (with trailing slash). Individual files within ignored directories will not be returned. """ ignored = set() with open_repo_closing(repo) as r: repo_path = Path(r.path).resolve() ignore_manager = IgnoreFilterManager.from_repo(r) if not paths: paths = list( get_untracked_paths( str(Path(os.getcwd()).resolve()), str(repo_path), r.open_index(), ) ) relpaths = [] if not isinstance(paths, list): paths = [paths] for p in paths: path = Path(p) relpath = str(path.resolve().relative_to(repo_path)) # FIXME: Support patterns if path.is_dir(): relpath = os.path.join(relpath, "") if ignore_manager.is_ignored(relpath): ignored.add(relpath) continue relpaths.append(relpath) r.stage(relpaths) return (relpaths, ignored) def _is_subdir(subdir, parentdir): """Check whether subdir is parentdir or a subdir of parentdir If parentdir or subdir is a relative path, it will be disamgibuated relative to the pwd. """ parentdir_abs = os.path.realpath(parentdir) + os.path.sep subdir_abs = os.path.realpath(subdir) + os.path.sep return subdir_abs.startswith(parentdir_abs) # TODO: option to remove ignored files also, in line with `git clean -fdx` def clean(repo=".", target_dir=None): """Remove any untracked files from the target directory recursively Equivalent to running `git clean -fd` in target_dir. Args: repo: Repository where the files may be tracked target_dir: Directory to clean - current directory if None """ if target_dir is None: target_dir = os.getcwd() with open_repo_closing(repo) as r: if not _is_subdir(target_dir, r.path): raise Error("target_dir must be in the repo's working dir") config = r.get_config_stack() require_force = config.get_boolean( # noqa: F841 (b"clean",), b"requireForce", True ) # TODO(jelmer): if require_force is set, then make sure that -f, -i or # -n is specified. index = r.open_index() ignore_manager = IgnoreFilterManager.from_repo(r) paths_in_wd = _walk_working_dir_paths(target_dir, r.path) # Reverse file visit order, so that files and subdirectories are # removed before containing directory for ap, is_dir in reversed(list(paths_in_wd)): if is_dir: # All subdirectories and files have been removed if untracked, # so dir contains no tracked files iff it is empty. is_empty = len(os.listdir(ap)) == 0 if is_empty: os.rmdir(ap) else: ip = path_to_tree_path(r.path, ap) is_tracked = ip in index rp = os.path.relpath(ap, r.path) is_ignored = ignore_manager.is_ignored(rp) if not is_tracked and not is_ignored: os.remove(ap) def remove(repo=".", paths=None, cached=False): """Remove files from the staging area. Args: repo: Repository for the files paths: Paths to remove """ with open_repo_closing(repo) as r: index = r.open_index() for p in paths: full_path = os.fsencode(os.path.abspath(p)) tree_path = path_to_tree_path(r.path, p) try: index_sha = index[tree_path].sha except KeyError: raise Error("%s did not match any files" % p) if not cached: try: st = os.lstat(full_path) except OSError: pass else: try: blob = blob_from_path_and_stat(full_path, st) except IOError: pass else: try: committed_sha = tree_lookup_path( r.__getitem__, r[r.head()].tree, tree_path )[1] except KeyError: committed_sha = None if blob.id != index_sha and index_sha != committed_sha: raise Error( "file has staged content differing " "from both the file and head: %s" % p ) if index_sha != committed_sha: raise Error("file has staged changes: %s" % p) os.remove(full_path) del index[tree_path] index.write() rm = remove def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING): if commit.encoding: encoding = commit.encoding.decode("ascii") else: encoding = default_encoding return contents.decode(encoding, "replace") def commit_encode(commit, contents, default_encoding=DEFAULT_ENCODING): if commit.encoding: encoding = commit.encoding.decode("ascii") else: encoding = default_encoding return contents.encode(encoding) def print_commit(commit, decode, outstream=sys.stdout): """Write a human-readable commit log entry. Args: commit: A `Commit` object outstream: A stream file to write to """ outstream.write("-" * 50 + "\n") outstream.write("commit: " + commit.id.decode("ascii") + "\n") if len(commit.parents) > 1: outstream.write( "merge: " + "...".join([c.decode("ascii") for c in commit.parents[1:]]) + "\n" ) outstream.write("Author: " + decode(commit.author) + "\n") if commit.author != commit.committer: outstream.write("Committer: " + decode(commit.committer) + "\n") time_tuple = time.gmtime(commit.author_time + commit.author_timezone) time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple) timezone_str = format_timezone(commit.author_timezone).decode("ascii") outstream.write("Date: " + time_str + " " + timezone_str + "\n") outstream.write("\n") outstream.write(decode(commit.message) + "\n") outstream.write("\n") def print_tag(tag, decode, outstream=sys.stdout): """Write a human-readable tag. Args: tag: A `Tag` object decode: Function for decoding bytes to unicode string outstream: A stream to write to """ outstream.write("Tagger: " + decode(tag.tagger) + "\n") time_tuple = time.gmtime(tag.tag_time + tag.tag_timezone) time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple) timezone_str = format_timezone(tag.tag_timezone).decode("ascii") outstream.write("Date: " + time_str + " " + timezone_str + "\n") outstream.write("\n") outstream.write(decode(tag.message)) outstream.write("\n") def show_blob(repo, blob, decode, outstream=sys.stdout): """Write a blob to a stream. Args: repo: A `Repo` object blob: A `Blob` object decode: Function for decoding bytes to unicode string outstream: A stream file to write to """ outstream.write(decode(blob.data)) def show_commit(repo, commit, decode, outstream=sys.stdout): """Show a commit to a stream. Args: repo: A `Repo` object commit: A `Commit` object decode: Function for decoding bytes to unicode string outstream: Stream to write to """ print_commit(commit, decode=decode, outstream=outstream) if commit.parents: parent_commit = repo[commit.parents[0]] base_tree = parent_commit.tree else: base_tree = None diffstream = BytesIO() write_tree_diff(diffstream, repo.object_store, base_tree, commit.tree) diffstream.seek(0) outstream.write(commit_decode(commit, diffstream.getvalue())) def show_tree(repo, tree, decode, outstream=sys.stdout): """Print a tree to a stream. Args: repo: A `Repo` object tree: A `Tree` object decode: Function for decoding bytes to unicode string outstream: Stream to write to """ for n in tree: outstream.write(decode(n) + "\n") def show_tag(repo, tag, decode, outstream=sys.stdout): """Print a tag to a stream. Args: repo: A `Repo` object tag: A `Tag` object decode: Function for decoding bytes to unicode string outstream: Stream to write to """ print_tag(tag, decode, outstream) show_object(repo, repo[tag.object[1]], decode, outstream) def show_object(repo, obj, decode, outstream): return { b"tree": show_tree, b"blob": show_blob, b"commit": show_commit, b"tag": show_tag, }[obj.type_name](repo, obj, decode, outstream) def print_name_status(changes): """Print a simple status summary, listing changed files.""" for change in changes: if not change: continue if isinstance(change, list): change = change[0] if change.type == CHANGE_ADD: path1 = change.new.path path2 = "" kind = "A" elif change.type == CHANGE_DELETE: path1 = change.old.path path2 = "" kind = "D" elif change.type == CHANGE_MODIFY: path1 = change.new.path path2 = "" kind = "M" elif change.type in RENAME_CHANGE_TYPES: path1 = change.old.path path2 = change.new.path if change.type == CHANGE_RENAME: kind = "R" elif change.type == CHANGE_COPY: kind = "C" yield "%-8s%-20s%-20s" % (kind, path1, path2) def log( repo=".", paths=None, outstream=sys.stdout, max_entries=None, reverse=False, name_status=False, ): """Write commit logs. Args: repo: Path to repository paths: Optional set of specific paths to print entries for outstream: Stream to write log output to reverse: Reverse order in which entries are printed name_status: Print name status max_entries: Optional maximum number of entries to display """ with open_repo_closing(repo) as r: walker = r.get_walker(max_entries=max_entries, paths=paths, reverse=reverse) for entry in walker: def decode(x): return commit_decode(entry.commit, x) print_commit(entry.commit, decode, outstream) if name_status: outstream.writelines( [line + "\n" for line in print_name_status(entry.changes())] ) # TODO(jelmer): better default for encoding? def show( repo=".", objects=None, outstream=sys.stdout, default_encoding=DEFAULT_ENCODING, ): """Print the changes in a commit. Args: repo: Path to repository objects: Objects to show (defaults to [HEAD]) outstream: Stream to write to default_encoding: Default encoding to use if none is set in the commit """ if objects is None: objects = ["HEAD"] if not isinstance(objects, list): objects = [objects] with open_repo_closing(repo) as r: for objectish in objects: o = parse_object(r, objectish) if isinstance(o, Commit): def decode(x): return commit_decode(o, x, default_encoding) else: def decode(x): return x.decode(default_encoding) show_object(r, o, decode, outstream) def diff_tree(repo, old_tree, new_tree, outstream=sys.stdout): """Compares the content and mode of blobs found via two tree objects. Args: repo: Path to repository old_tree: Id of old tree new_tree: Id of new tree outstream: Stream to write to """ with open_repo_closing(repo) as r: write_tree_diff(outstream, r.object_store, old_tree, new_tree) def rev_list(repo, commits, outstream=sys.stdout): """Lists commit objects in reverse chronological order. Args: repo: Path to repository commits: Commits over which to iterate outstream: Stream to write to """ with open_repo_closing(repo) as r: for entry in r.get_walker(include=[r[c].id for c in commits]): outstream.write(entry.commit.id + b"\n") def tag(*args, **kwargs): import warnings warnings.warn( "tag has been deprecated in favour of tag_create.", DeprecationWarning ) return tag_create(*args, **kwargs) def tag_create( repo, tag, author=None, message=None, annotated=False, objectish="HEAD", tag_time=None, tag_timezone=None, sign=False, ): """Creates a tag in git via dulwich calls: Args: repo: Path to repository tag: tag string author: tag author (optional, if annotated is set) message: tag message (optional) annotated: whether to create an annotated tag objectish: object the tag should point at, defaults to HEAD tag_time: Optional time for annotated tag tag_timezone: Optional timezone for annotated tag sign: GPG Sign the tag (bool, defaults to False, pass True to use default GPG key, pass a str containing Key ID to use a specific GPG key) """ with open_repo_closing(repo) as r: object = parse_object(r, objectish) if annotated: # Create the tag object tag_obj = Tag() if author is None: # TODO(jelmer): Don't use repo private method. author = r._get_user_identity(r.get_config_stack()) tag_obj.tagger = author tag_obj.message = message + "\n".encode() tag_obj.name = tag tag_obj.object = (type(object), object.id) if tag_time is None: tag_time = int(time.time()) tag_obj.tag_time = tag_time if tag_timezone is None: # TODO(jelmer) Use current user timezone rather than UTC tag_timezone = 0 elif isinstance(tag_timezone, str): tag_timezone = parse_timezone(tag_timezone) tag_obj.tag_timezone = tag_timezone if sign: tag_obj.sign(sign if isinstance(sign, str) else None) r.object_store.add_object(tag_obj) tag_id = tag_obj.id else: tag_id = object.id r.refs[_make_tag_ref(tag)] = tag_id def list_tags(*args, **kwargs): import warnings warnings.warn( "list_tags has been deprecated in favour of tag_list.", DeprecationWarning, ) return tag_list(*args, **kwargs) def tag_list(repo, outstream=sys.stdout): """List all tags. Args: repo: Path to repository outstream: Stream to write tags to """ with open_repo_closing(repo) as r: tags = sorted(r.refs.as_dict(b"refs/tags")) return tags def tag_delete(repo, name): """Remove a tag. Args: repo: Path to repository name: Name of tag to remove """ with open_repo_closing(repo) as r: if isinstance(name, bytes): names = [name] elif isinstance(name, list): names = name else: raise Error("Unexpected tag name type %r" % name) for name in names: del r.refs[_make_tag_ref(name)] def reset(repo, mode, treeish="HEAD"): """Reset current HEAD to the specified state. Args: repo: Path to repository mode: Mode ("hard", "soft", "mixed") treeish: Treeish to reset to """ if mode != "hard": raise Error("hard is the only mode currently supported") with open_repo_closing(repo) as r: tree = parse_tree(r, treeish) r.reset_index(tree.id) def get_remote_repo( repo: Repo, remote_location: Optional[Union[str, bytes]] = None ) -> Tuple[Optional[str], str]: config = repo.get_config() if remote_location is None: remote_location = get_branch_remote(repo) if isinstance(remote_location, str): encoded_location = remote_location.encode() else: encoded_location = remote_location section = (b"remote", encoded_location) remote_name = None # type: Optional[str] if config.has_section(section): remote_name = encoded_location.decode() url = config.get(section, "url") encoded_location = url else: remote_name = None return (remote_name, encoded_location.decode()) def push( repo, remote_location=None, refspecs=None, outstream=default_bytes_out_stream, errstream=default_bytes_err_stream, force=False, **kwargs ): """Remote push with dulwich via dulwich.client Args: repo: Path to repository remote_location: Location of the remote refspecs: Refs to push to remote outstream: A stream file to write output errstream: A stream file to write errors force: Force overwriting refs """ # Open the repo with open_repo_closing(repo) as r: if refspecs is None: refspecs = [active_branch(r)] (remote_name, remote_location) = get_remote_repo(r, remote_location) # Get the client and path client, path = get_transport_and_path( remote_location, config=r.get_config_stack(), **kwargs ) selected_refs = [] remote_changed_refs = {} def update_refs(refs): selected_refs.extend(parse_reftuples(r.refs, refs, refspecs, force=force)) new_refs = {} # TODO: Handle selected_refs == {None: None} for (lh, rh, force_ref) in selected_refs: if lh is None: new_refs[rh] = ZERO_SHA remote_changed_refs[rh] = None else: try: localsha = r.refs[lh] except KeyError: raise Error("No valid ref %s in local repository" % lh) if not force_ref and rh in refs: check_diverged(r, refs[rh], localsha) new_refs[rh] = localsha remote_changed_refs[rh] = localsha return new_refs err_encoding = getattr(errstream, "encoding", None) or DEFAULT_ENCODING remote_location = client.get_url(path) try: result = client.send_pack( path, update_refs, generate_pack_data=r.generate_pack_data, progress=errstream.write, ) except SendPackError as e: raise Error( "Push to " + remote_location + " failed -> " + e.args[0].decode(), inner=e, ) else: errstream.write( b"Push to " + remote_location.encode(err_encoding) + b" successful.\n" ) for ref, error in (result.ref_status or {}).items(): if error is not None: errstream.write( b"Push of ref %s failed: %s\n" % (ref, error.encode(err_encoding)) ) else: errstream.write(b"Ref %s updated\n" % ref) if remote_name is not None: _import_remote_refs(r.refs, remote_name, remote_changed_refs) def pull( repo, remote_location=None, refspecs=None, outstream=default_bytes_out_stream, errstream=default_bytes_err_stream, fast_forward=True, force=False, **kwargs ): """Pull from remote via dulwich.client Args: repo: Path to repository remote_location: Location of the remote refspec: refspecs to fetch outstream: A stream file to write to output errstream: A stream file to write to errors """ # Open the repo with open_repo_closing(repo) as r: (remote_name, remote_location) = get_remote_repo(r, remote_location) if refspecs is None: refspecs = [b"HEAD"] selected_refs = [] def determine_wants(remote_refs, **kwargs): selected_refs.extend( parse_reftuples(remote_refs, r.refs, refspecs, force=force) ) return [ remote_refs[lh] for (lh, rh, force_ref) in selected_refs if remote_refs[lh] not in r.object_store ] client, path = get_transport_and_path( remote_location, config=r.get_config_stack(), **kwargs ) fetch_result = client.fetch( path, r, progress=errstream.write, determine_wants=determine_wants ) for (lh, rh, force_ref) in selected_refs: try: check_diverged(r, r.refs[rh], fetch_result.refs[lh]) except DivergedBranches: if fast_forward: raise else: raise NotImplementedError("merge is not yet supported") r.refs[rh] = fetch_result.refs[lh] if selected_refs: r[b"HEAD"] = fetch_result.refs[selected_refs[0][1]] # Perform 'git checkout .' - syncs staged changes tree = r[b"HEAD"].tree r.reset_index(tree=tree) if remote_name is not None: _import_remote_refs(r.refs, remote_name, fetch_result.refs) def status(repo=".", ignored=False): """Returns staged, unstaged, and untracked changes relative to the HEAD. Args: repo: Path to repository or repository object ignored: Whether to include ignored files in `untracked` Returns: GitStatus tuple, staged - dict with lists of staged paths (diff index/HEAD) unstaged - list of unstaged paths (diff index/working-tree) untracked - list of untracked, un-ignored & non-.git paths """ with open_repo_closing(repo) as r: # 1. Get status of staged tracked_changes = get_tree_changes(r) # 2. Get status of unstaged index = r.open_index() normalizer = r.get_blob_normalizer() filter_callback = normalizer.checkin_normalize - unstaged_changes = list(get_unstaged_changes(index, r, filter_callback)) + unstaged_changes = list(get_unstaged_changes(index, r.path, filter_callback)) untracked_paths = get_untracked_paths( r.path, r.path, index, exclude_ignored=not ignored ) untracked_changes = list(untracked_paths) return GitStatus(tracked_changes, unstaged_changes, untracked_changes) def _walk_working_dir_paths(frompath, basepath, prune_dirnames=None): """Get path, is_dir for files in working dir from frompath Args: frompath: Path to begin walk basepath: Path to compare to prune_dirnames: Optional callback to prune dirnames during os.walk dirnames will be set to result of prune_dirnames(dirpath, dirnames) """ for dirpath, dirnames, filenames in os.walk(frompath): # Skip .git and below. if ".git" in dirnames: dirnames.remove(".git") if dirpath != basepath: continue if ".git" in filenames: filenames.remove(".git") if dirpath != basepath: continue if dirpath != frompath: yield dirpath, True for filename in filenames: filepath = os.path.join(dirpath, filename) yield filepath, False if prune_dirnames: dirnames[:] = prune_dirnames(dirpath, dirnames) def get_untracked_paths(frompath, basepath, index, exclude_ignored=False): """Get untracked paths. Args: frompath: Path to walk basepath: Path to compare to index: Index to check against exclude_ignored: Whether to exclude ignored paths Note: ignored directories will never be walked for performance reasons. If exclude_ignored is False, only the path to an ignored directory will be yielded, no files inside the directory will be returned """ with open_repo_closing(basepath) as r: ignore_manager = IgnoreFilterManager.from_repo(r) ignored_dirs = [] def prune_dirnames(dirpath, dirnames): for i in range(len(dirnames) - 1, -1, -1): path = os.path.join(dirpath, dirnames[i]) ip = os.path.join(os.path.relpath(path, basepath), "") if ignore_manager.is_ignored(ip): if not exclude_ignored: ignored_dirs.append( os.path.join(os.path.relpath(path, frompath), "") ) del dirnames[i] return dirnames for ap, is_dir in _walk_working_dir_paths( frompath, basepath, prune_dirnames=prune_dirnames ): if not is_dir: ip = path_to_tree_path(basepath, ap) if ip not in index: if ( not exclude_ignored or not ignore_manager.is_ignored( os.path.relpath(ap, basepath) ) ): yield os.path.relpath(ap, frompath) yield from ignored_dirs def get_tree_changes(repo): """Return add/delete/modify changes to tree by comparing index to HEAD. Args: repo: repo path or object Returns: dict with lists for each type of change """ with open_repo_closing(repo) as r: index = r.open_index() # Compares the Index to the HEAD & determines changes # Iterate through the changes and report add/delete/modify # TODO: call out to dulwich.diff_tree somehow. tracked_changes = { "add": [], "delete": [], "modify": [], } try: tree_id = r[b"HEAD"].tree except KeyError: tree_id = None for change in index.changes_from_tree(r.object_store, tree_id): if not change[0][0]: tracked_changes["add"].append(change[0][1]) elif not change[0][1]: tracked_changes["delete"].append(change[0][0]) elif change[0][0] == change[0][1]: tracked_changes["modify"].append(change[0][0]) else: raise NotImplementedError("git mv ops not yet supported") return tracked_changes def daemon(path=".", address=None, port=None): """Run a daemon serving Git requests over TCP/IP. Args: path: Path to the directory to serve. address: Optional address to listen on (defaults to ::) port: Optional port to listen on (defaults to TCP_GIT_PORT) """ # TODO(jelmer): Support git-daemon-export-ok and --export-all. backend = FileSystemBackend(path) server = TCPGitServer(backend, address, port) server.serve_forever() def web_daemon(path=".", address=None, port=None): """Run a daemon serving Git requests over HTTP. Args: path: Path to the directory to serve address: Optional address to listen on (defaults to ::) port: Optional port to listen on (defaults to 80) """ from dulwich.web import ( make_wsgi_chain, make_server, WSGIRequestHandlerLogger, WSGIServerLogger, ) backend = FileSystemBackend(path) app = make_wsgi_chain(backend) server = make_server( address, port, app, handler_class=WSGIRequestHandlerLogger, server_class=WSGIServerLogger, ) server.serve_forever() def upload_pack(path=".", inf=None, outf=None): """Upload a pack file after negotiating its contents using smart protocol. Args: path: Path to the repository inf: Input stream to communicate with client outf: Output stream to communicate with client """ if outf is None: outf = getattr(sys.stdout, "buffer", sys.stdout) if inf is None: inf = getattr(sys.stdin, "buffer", sys.stdin) path = os.path.expanduser(path) backend = FileSystemBackend(path) def send_fn(data): outf.write(data) outf.flush() proto = Protocol(inf.read, send_fn) handler = UploadPackHandler(backend, [path], proto) # FIXME: Catch exceptions and write a single-line summary to outf. handler.handle() return 0 def receive_pack(path=".", inf=None, outf=None): """Receive a pack file after negotiating its contents using smart protocol. Args: path: Path to the repository inf: Input stream to communicate with client outf: Output stream to communicate with client """ if outf is None: outf = getattr(sys.stdout, "buffer", sys.stdout) if inf is None: inf = getattr(sys.stdin, "buffer", sys.stdin) path = os.path.expanduser(path) backend = FileSystemBackend(path) def send_fn(data): outf.write(data) outf.flush() proto = Protocol(inf.read, send_fn) handler = ReceivePackHandler(backend, [path], proto) # FIXME: Catch exceptions and write a single-line summary to outf. handler.handle() return 0 def _make_branch_ref(name): if getattr(name, "encode", None): name = name.encode(DEFAULT_ENCODING) return LOCAL_BRANCH_PREFIX + name def _make_tag_ref(name): if getattr(name, "encode", None): name = name.encode(DEFAULT_ENCODING) return b"refs/tags/" + name def branch_delete(repo, name): """Delete a branch. Args: repo: Path to the repository name: Name of the branch """ with open_repo_closing(repo) as r: if isinstance(name, list): names = name else: names = [name] for name in names: del r.refs[_make_branch_ref(name)] def branch_create(repo, name, objectish=None, force=False): """Create a branch. Args: repo: Path to the repository name: Name of the new branch objectish: Target object to point new branch at (defaults to HEAD) force: Force creation of branch, even if it already exists """ with open_repo_closing(repo) as r: if objectish is None: objectish = "HEAD" object = parse_object(r, objectish) refname = _make_branch_ref(name) ref_message = b"branch: Created from " + objectish.encode("utf-8") if force: r.refs.set_if_equals(refname, None, object.id, message=ref_message) else: if not r.refs.add_if_new(refname, object.id, message=ref_message): raise Error("Branch with name %s already exists." % name) def branch_list(repo): """List all branches. Args: repo: Path to the repository """ with open_repo_closing(repo) as r: return r.refs.keys(base=LOCAL_BRANCH_PREFIX) def active_branch(repo): """Return the active branch in the repository, if any. Args: repo: Repository to open Returns: branch name Raises: KeyError: if the repository does not have a working tree IndexError: if HEAD is floating """ with open_repo_closing(repo) as r: active_ref = r.refs.follow(b"HEAD")[0][1] if not active_ref.startswith(LOCAL_BRANCH_PREFIX): raise ValueError(active_ref) return active_ref[len(LOCAL_BRANCH_PREFIX) :] def get_branch_remote(repo): """Return the active branch's remote name, if any. Args: repo: Repository to open Returns: remote name Raises: KeyError: if the repository does not have a working tree """ with open_repo_closing(repo) as r: branch_name = active_branch(r.path) config = r.get_config() try: remote_name = config.get((b"branch", branch_name), b"remote") except KeyError: remote_name = b"origin" return remote_name def _import_remote_refs( refs_container: RefsContainer, remote_name: str, refs: Dict[str, str], message: Optional[bytes] = None, prune: bool = False, prune_tags: bool = False, ): stripped_refs = strip_peeled_refs(refs) branches = { n[len(LOCAL_BRANCH_PREFIX) :]: v for (n, v) in stripped_refs.items() if n.startswith(LOCAL_BRANCH_PREFIX) } refs_container.import_refs( b"refs/remotes/" + remote_name.encode(), branches, message=message, prune=prune, ) tags = { n[len(b"refs/tags/") :]: v for (n, v) in stripped_refs.items() if n.startswith(b"refs/tags/") and not n.endswith(ANNOTATED_TAG_SUFFIX) } refs_container.import_refs(b"refs/tags", tags, message=message, prune=prune_tags) def fetch( repo, remote_location=None, outstream=sys.stdout, errstream=default_bytes_err_stream, message=None, depth=None, prune=False, prune_tags=False, force=False, **kwargs ): """Fetch objects from a remote server. Args: repo: Path to the repository remote_location: String identifying a remote server outstream: Output stream (defaults to stdout) errstream: Error stream (defaults to stderr) message: Reflog message (defaults to b"fetch: from ") depth: Depth to fetch at prune: Prune remote removed refs prune_tags: Prune reomte removed tags Returns: Dictionary with refs on the remote """ with open_repo_closing(repo) as r: (remote_name, remote_location) = get_remote_repo(r, remote_location) if message is None: message = b"fetch: from " + remote_location.encode("utf-8") client, path = get_transport_and_path( remote_location, config=r.get_config_stack(), **kwargs ) fetch_result = client.fetch(path, r, progress=errstream.write, depth=depth) if remote_name is not None: _import_remote_refs( r.refs, remote_name, fetch_result.refs, message, prune=prune, prune_tags=prune_tags, ) return fetch_result def ls_remote(remote, config=None, **kwargs): """List the refs in a remote. Args: remote: Remote repository location config: Configuration to use Returns: Dictionary with remote refs """ if config is None: config = StackedConfig.default() client, host_path = get_transport_and_path(remote, config=config, **kwargs) return client.get_refs(host_path) def repack(repo): """Repack loose files in a repository. Currently this only packs loose objects. Args: repo: Path to the repository """ with open_repo_closing(repo) as r: r.object_store.pack_loose_objects() def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None): """Pack objects into a file. Args: repo: Path to the repository object_ids: List of object ids to write packf: File-like object to write to idxf: File-like object to write to (can be None) """ with open_repo_closing(repo) as r: entries, data_sum = write_pack_objects( packf, r.object_store.iter_shas((oid, None) for oid in object_ids), delta_window_size=delta_window_size, ) if idxf is not None: entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()]) write_pack_index(idxf, entries, data_sum) def ls_tree( repo, treeish=b"HEAD", outstream=sys.stdout, recursive=False, name_only=False, ): """List contents of a tree. Args: repo: Path to the repository tree_ish: Tree id to list outstream: Output stream (defaults to stdout) recursive: Whether to recursively list files name_only: Only print item name """ def list_tree(store, treeid, base): for (name, mode, sha) in store[treeid].iteritems(): if base: name = posixpath.join(base, name) if name_only: outstream.write(name + b"\n") else: outstream.write(pretty_format_tree_entry(name, mode, sha)) if stat.S_ISDIR(mode) and recursive: list_tree(store, sha, name) with open_repo_closing(repo) as r: tree = parse_tree(r, treeish) list_tree(r.object_store, tree.id, "") def remote_add(repo, name, url): """Add a remote. Args: repo: Path to the repository name: Remote name url: Remote URL """ if not isinstance(name, bytes): name = name.encode(DEFAULT_ENCODING) if not isinstance(url, bytes): url = url.encode(DEFAULT_ENCODING) with open_repo_closing(repo) as r: c = r.get_config() section = (b"remote", name) if c.has_section(section): raise RemoteExists(section) c.set(section, b"url", url) c.write_to_path() def check_ignore(repo, paths, no_index=False): """Debug gitignore files. Args: repo: Path to the repository paths: List of paths to check for no_index: Don't check index Returns: List of ignored files """ with open_repo_closing(repo) as r: index = r.open_index() ignore_manager = IgnoreFilterManager.from_repo(r) for path in paths: if not no_index and path_to_tree_path(r.path, path) in index: continue if os.path.isabs(path): path = os.path.relpath(path, r.path) if ignore_manager.is_ignored(path): yield path def update_head(repo, target, detached=False, new_branch=None): """Update HEAD to point at a new branch/commit. Note that this does not actually update the working tree. Args: repo: Path to the repository detach: Create a detached head target: Branch or committish to switch to new_branch: New branch to create """ with open_repo_closing(repo) as r: if new_branch is not None: to_set = _make_branch_ref(new_branch) else: to_set = b"HEAD" if detached: # TODO(jelmer): Provide some way so that the actual ref gets # updated rather than what it points to, so the delete isn't # necessary. del r.refs[to_set] r.refs[to_set] = parse_commit(r, target).id else: r.refs.set_symbolic_ref(to_set, parse_ref(r, target)) if new_branch is not None: r.refs.set_symbolic_ref(b"HEAD", to_set) def check_mailmap(repo, contact): """Check canonical name and email of contact. Args: repo: Path to the repository contact: Contact name and/or email Returns: Canonical contact data """ with open_repo_closing(repo) as r: from dulwich.mailmap import Mailmap try: mailmap = Mailmap.from_path(os.path.join(r.path, ".mailmap")) except FileNotFoundError: mailmap = Mailmap() return mailmap.lookup(contact) def fsck(repo): """Check a repository. Args: repo: A path to the repository Returns: Iterator over errors/warnings """ with open_repo_closing(repo) as r: # TODO(jelmer): check pack files # TODO(jelmer): check graph # TODO(jelmer): check refs for sha in r.object_store: o = r.object_store[sha] try: o.check() except Exception as e: yield (sha, e) def stash_list(repo): """List all stashes in a repository.""" with open_repo_closing(repo) as r: from dulwich.stash import Stash stash = Stash.from_repo(r) return enumerate(list(stash.stashes())) def stash_push(repo): """Push a new stash onto the stack.""" with open_repo_closing(repo) as r: from dulwich.stash import Stash stash = Stash.from_repo(r) stash.push() def stash_pop(repo): """Pop a new stash from the stack.""" with open_repo_closing(repo) as r: from dulwich.stash import Stash stash = Stash.from_repo(r) stash.pop() def ls_files(repo): """List all files in an index.""" with open_repo_closing(repo) as r: return sorted(r.open_index()) def describe(repo): """Describe the repository version. Args: projdir: git repository root Returns: a string description of the current git revision Examples: "gabcdefh", "v0.1" or "v0.1-5-gabcdefh". """ # Get the repository with open_repo_closing(repo) as r: # Get a list of all tags refs = r.get_refs() tags = {} for key, value in refs.items(): key = key.decode() obj = r.get_object(value) if u"tags" not in key: continue _, tag = key.rsplit(u"/", 1) try: commit = obj.object except AttributeError: continue else: commit = r.get_object(commit[1]) tags[tag] = [ datetime.datetime(*time.gmtime(commit.commit_time)[:6]), commit.id.decode("ascii"), ] sorted_tags = sorted(tags.items(), key=lambda tag: tag[1][0], reverse=True) # If there are no tags, return the current commit if len(sorted_tags) == 0: return "g{}".format(r[r.head()].id.decode("ascii")[:7]) # We're now 0 commits from the top commit_count = 0 # Get the latest commit latest_commit = r[r.head()] # Walk through all commits walker = r.get_walker() for entry in walker: # Check if tag commit_id = entry.commit.id.decode("ascii") for tag in sorted_tags: tag_name = tag[0] tag_commit = tag[1][1] if commit_id == tag_commit: if commit_count == 0: return tag_name else: return "{}-{}-g{}".format( tag_name, commit_count, latest_commit.id.decode("ascii")[:7], ) commit_count += 1 # Return plain commit if no parent tag can be found return "g{}".format(latest_commit.id.decode("ascii")[:7]) def get_object_by_path(repo, path, committish=None): """Get an object by path. Args: repo: A path to the repository path: Path to look up committish: Commit to look up path in Returns: A `ShaFile` object """ if committish is None: committish = "HEAD" # Get the repository with open_repo_closing(repo) as r: commit = parse_commit(r, committish) base_tree = commit.tree if not isinstance(path, bytes): path = commit_encode(commit, path) (mode, sha) = tree_lookup_path(r.object_store.__getitem__, base_tree, path) return r[sha] def write_tree(repo): """Write a tree object from the index. Args: repo: Repository for which to write tree Returns: tree id for the tree that was written """ with open_repo_closing(repo) as r: return r.open_index().commit(r.object_store) diff --git a/dulwich/tests/test_index.py b/dulwich/tests/test_index.py index 9a91cb93..a187bd26 100644 --- a/dulwich/tests/test_index.py +++ b/dulwich/tests/test_index.py @@ -1,841 +1,841 @@ # -*- coding: utf-8 -*- # test_index.py -- Tests for the git index # encoding: utf-8 # Copyright (C) 2008-2009 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for the index.""" from io import BytesIO import os import shutil import stat import struct import sys import tempfile import warnings from dulwich.index import ( Index, build_index_from_tree, cleanup_mode, commit_tree, get_unstaged_changes, index_entry_from_stat, read_index, read_index_dict, validate_path_element_default, validate_path_element_ntfs, write_cache_time, write_index, write_index_dict, _tree_to_fs_path, _fs_to_tree_path, IndexEntry, ) from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Blob, Commit, Tree, S_IFGITLINK, ) from dulwich.repo import Repo from dulwich.tests import ( TestCase, skipIf, ) from dulwich.tests.utils import ( setup_warning_catcher, ) def can_symlink(): """Return whether running process can create symlinks.""" if sys.platform != "win32": # Platforms other than Windows should allow symlinks without issues. return True if not hasattr(os, "symlink"): # Older Python versions do not have `os.symlink` on Windows. return False test_source = tempfile.mkdtemp() test_target = test_source + "can_symlink" try: os.symlink(test_source, test_target) except (NotImplementedError, OSError): return False return True class IndexTestCase(TestCase): datadir = os.path.join(os.path.dirname(__file__), "data/indexes") def get_simple_index(self, name): return Index(os.path.join(self.datadir, name)) class SimpleIndexTestCase(IndexTestCase): def test_len(self): self.assertEqual(1, len(self.get_simple_index("index"))) def test_iter(self): self.assertEqual([b"bla"], list(self.get_simple_index("index"))) def test_iterobjects(self): self.assertEqual( [(b"bla", b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 33188)], list(self.get_simple_index("index").iterobjects()), ) def test_iterblobs(self): warnings.simplefilter("always", UserWarning) self.addCleanup(warnings.resetwarnings) warnings_list, restore_warnings = setup_warning_catcher() self.addCleanup(restore_warnings) self.assertEqual( [(b"bla", b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 33188)], list(self.get_simple_index("index").iterblobs()), ) expected_warning = PendingDeprecationWarning("Use iterobjects() instead.") for w in warnings_list: if type(w) == type(expected_warning) and w.args == expected_warning.args: break else: raise AssertionError( "Expected warning %r not in %r" % (expected_warning, warnings_list) ) def test_getitem(self): self.assertEqual( ( (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 0, 0, ), self.get_simple_index("index")[b"bla"], ) def test_empty(self): i = self.get_simple_index("notanindex") self.assertEqual(0, len(i)) self.assertFalse(os.path.exists(i._filename)) def test_against_empty_tree(self): i = self.get_simple_index("index") changes = list(i.changes_from_tree(MemoryObjectStore(), None)) self.assertEqual(1, len(changes)) (oldname, newname), (oldmode, newmode), (oldsha, newsha) = changes[0] self.assertEqual(b"bla", newname) self.assertEqual(b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", newsha) class SimpleIndexWriterTestCase(IndexTestCase): def setUp(self): IndexTestCase.setUp(self) self.tempdir = tempfile.mkdtemp() def tearDown(self): IndexTestCase.tearDown(self) shutil.rmtree(self.tempdir) def test_simple_write(self): entries = [ ( b"barbla", IndexEntry( (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 0, 0) ) ] filename = os.path.join(self.tempdir, "test-simple-write-index") with open(filename, "wb+") as x: write_index(x, entries) with open(filename, "rb") as x: self.assertEqual(entries, list(read_index(x))) class ReadIndexDictTests(IndexTestCase): def setUp(self): IndexTestCase.setUp(self) self.tempdir = tempfile.mkdtemp() def tearDown(self): IndexTestCase.tearDown(self) shutil.rmtree(self.tempdir) def test_simple_write(self): entries = { b"barbla": IndexEntry( (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 0, 0, ) } filename = os.path.join(self.tempdir, "test-simple-write-index") with open(filename, "wb+") as x: write_index_dict(x, entries) with open(filename, "rb") as x: self.assertEqual(entries, read_index_dict(x)) class CommitTreeTests(TestCase): def setUp(self): super(CommitTreeTests, self).setUp() self.store = MemoryObjectStore() def test_single_blob(self): blob = Blob() blob.data = b"foo" self.store.add_object(blob) blobs = [(b"bla", blob.id, stat.S_IFREG)] rootid = commit_tree(self.store, blobs) self.assertEqual(rootid, b"1a1e80437220f9312e855c37ac4398b68e5c1d50") self.assertEqual((stat.S_IFREG, blob.id), self.store[rootid][b"bla"]) self.assertEqual(set([rootid, blob.id]), set(self.store._data.keys())) def test_nested(self): blob = Blob() blob.data = b"foo" self.store.add_object(blob) blobs = [(b"bla/bar", blob.id, stat.S_IFREG)] rootid = commit_tree(self.store, blobs) self.assertEqual(rootid, b"d92b959b216ad0d044671981196781b3258fa537") dirid = self.store[rootid][b"bla"][1] self.assertEqual(dirid, b"c1a1deb9788150829579a8b4efa6311e7b638650") self.assertEqual((stat.S_IFDIR, dirid), self.store[rootid][b"bla"]) self.assertEqual((stat.S_IFREG, blob.id), self.store[dirid][b"bar"]) self.assertEqual(set([rootid, dirid, blob.id]), set(self.store._data.keys())) class CleanupModeTests(TestCase): def assertModeEqual(self, expected, got): self.assertEqual(expected, got, "%o != %o" % (expected, got)) def test_file(self): self.assertModeEqual(0o100644, cleanup_mode(0o100000)) def test_executable(self): self.assertModeEqual(0o100755, cleanup_mode(0o100711)) self.assertModeEqual(0o100755, cleanup_mode(0o100700)) def test_symlink(self): self.assertModeEqual(0o120000, cleanup_mode(0o120711)) def test_dir(self): self.assertModeEqual(0o040000, cleanup_mode(0o40531)) def test_submodule(self): self.assertModeEqual(0o160000, cleanup_mode(0o160744)) class WriteCacheTimeTests(TestCase): def test_write_string(self): f = BytesIO() self.assertRaises(TypeError, write_cache_time, f, "foo") def test_write_int(self): f = BytesIO() write_cache_time(f, 434343) self.assertEqual(struct.pack(">LL", 434343, 0), f.getvalue()) def test_write_tuple(self): f = BytesIO() write_cache_time(f, (434343, 21)) self.assertEqual(struct.pack(">LL", 434343, 21), f.getvalue()) def test_write_float(self): f = BytesIO() write_cache_time(f, 434343.000000021) self.assertEqual(struct.pack(">LL", 434343, 21), f.getvalue()) class IndexEntryFromStatTests(TestCase): def test_simple(self): st = os.stat_result( ( 16877, 131078, 64769, 154, 1000, 1000, 12288, 1323629595, 1324180496, 1324180496, ) ) entry = index_entry_from_stat(st, "22" * 20, 0) self.assertEqual( entry, IndexEntry( 1324180496, 1324180496, 64769, 131078, 16384, 1000, 1000, 12288, "2222222222222222222222222222222222222222", 0, None, ), ) def test_override_mode(self): st = os.stat_result( ( stat.S_IFREG + 0o644, 131078, 64769, 154, 1000, 1000, 12288, 1323629595, 1324180496, 1324180496, ) ) entry = index_entry_from_stat(st, "22" * 20, 0, mode=stat.S_IFREG + 0o755) self.assertEqual( entry, IndexEntry( 1324180496, 1324180496, 64769, 131078, 33261, 1000, 1000, 12288, "2222222222222222222222222222222222222222", 0, None, ), ) class BuildIndexTests(TestCase): def assertReasonableIndexEntry(self, index_entry, mode, filesize, sha): self.assertEqual(index_entry[4], mode) # mode self.assertEqual(index_entry[7], filesize) # filesize self.assertEqual(index_entry[8], sha) # sha def assertFileContents(self, path, contents, symlink=False): if symlink: self.assertEqual(os.readlink(path), contents) else: with open(path, "rb") as f: self.assertEqual(f.read(), contents) def test_empty(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: tree = Tree() repo.object_store.add_object(tree) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() self.assertEqual(len(index), 0) # Verify no files self.assertEqual([".git"], os.listdir(repo.path)) def test_git_dir(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Populate repo filea = Blob.from_string(b"file a") filee = Blob.from_string(b"d") tree = Tree() tree[b".git/a"] = (stat.S_IFREG | 0o644, filea.id) tree[b"c/e"] = (stat.S_IFREG | 0o644, filee.id) repo.object_store.add_objects([(o, None) for o in [filea, filee, tree]]) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() self.assertEqual(len(index), 1) # filea apath = os.path.join(repo.path, ".git", "a") self.assertFalse(os.path.exists(apath)) # filee epath = os.path.join(repo.path, "c", "e") self.assertTrue(os.path.exists(epath)) self.assertReasonableIndexEntry( index[b"c/e"], stat.S_IFREG | 0o644, 1, filee.id ) self.assertFileContents(epath, b"d") def test_nonempty(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Populate repo filea = Blob.from_string(b"file a") fileb = Blob.from_string(b"file b") filed = Blob.from_string(b"file d") tree = Tree() tree[b"a"] = (stat.S_IFREG | 0o644, filea.id) tree[b"b"] = (stat.S_IFREG | 0o644, fileb.id) tree[b"c/d"] = (stat.S_IFREG | 0o644, filed.id) repo.object_store.add_objects( [(o, None) for o in [filea, fileb, filed, tree]] ) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() self.assertEqual(len(index), 3) # filea apath = os.path.join(repo.path, "a") self.assertTrue(os.path.exists(apath)) self.assertReasonableIndexEntry( index[b"a"], stat.S_IFREG | 0o644, 6, filea.id ) self.assertFileContents(apath, b"file a") # fileb bpath = os.path.join(repo.path, "b") self.assertTrue(os.path.exists(bpath)) self.assertReasonableIndexEntry( index[b"b"], stat.S_IFREG | 0o644, 6, fileb.id ) self.assertFileContents(bpath, b"file b") # filed dpath = os.path.join(repo.path, "c", "d") self.assertTrue(os.path.exists(dpath)) self.assertReasonableIndexEntry( index[b"c/d"], stat.S_IFREG | 0o644, 6, filed.id ) self.assertFileContents(dpath, b"file d") # Verify no extra files self.assertEqual([".git", "a", "b", "c"], sorted(os.listdir(repo.path))) self.assertEqual(["d"], sorted(os.listdir(os.path.join(repo.path, "c")))) @skipIf(not getattr(os, "sync", None), "Requires sync support") def test_norewrite(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Populate repo filea = Blob.from_string(b"file a") filea_path = os.path.join(repo_dir, "a") tree = Tree() tree[b"a"] = (stat.S_IFREG | 0o644, filea.id) repo.object_store.add_objects([(o, None) for o in [filea, tree]]) # First Write build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Use sync as metadata can be cached on some FS os.sync() mtime = os.stat(filea_path).st_mtime # Test Rewrite build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) os.sync() self.assertEqual(mtime, os.stat(filea_path).st_mtime) # Modify content with open(filea_path, "wb") as fh: fh.write(b"test a") os.sync() mtime = os.stat(filea_path).st_mtime # Test rewrite build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) os.sync() with open(filea_path, "rb") as fh: self.assertEqual(b"file a", fh.read()) @skipIf(not can_symlink(), "Requires symlink support") def test_symlink(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Populate repo filed = Blob.from_string(b"file d") filee = Blob.from_string(b"d") tree = Tree() tree[b"c/d"] = (stat.S_IFREG | 0o644, filed.id) tree[b"c/e"] = (stat.S_IFLNK, filee.id) # symlink repo.object_store.add_objects([(o, None) for o in [filed, filee, tree]]) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() # symlink to d epath = os.path.join(repo.path, "c", "e") self.assertTrue(os.path.exists(epath)) self.assertReasonableIndexEntry( index[b"c/e"], stat.S_IFLNK, 0 if sys.platform == "win32" else 1, filee.id, ) self.assertFileContents(epath, "d", symlink=True) def test_no_decode_encode(self): repo_dir = tempfile.mkdtemp() repo_dir_bytes = os.fsencode(repo_dir) self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Populate repo file = Blob.from_string(b"foo") tree = Tree() latin1_name = u"À".encode("latin1") latin1_path = os.path.join(repo_dir_bytes, latin1_name) utf8_name = u"À".encode("utf8") utf8_path = os.path.join(repo_dir_bytes, utf8_name) tree[latin1_name] = (stat.S_IFREG | 0o644, file.id) tree[utf8_name] = (stat.S_IFREG | 0o644, file.id) repo.object_store.add_objects([(o, None) for o in [file, tree]]) try: build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) except OSError as e: if e.errno == 92 and sys.platform == "darwin": # Our filename isn't supported by the platform :( self.skipTest("can not write filename %r" % e.filename) else: raise except UnicodeDecodeError: # This happens e.g. with python3.6 on Windows. # It implicitly decodes using utf8, which doesn't work. self.skipTest("can not implicitly convert as utf8") # Verify index entries index = repo.open_index() self.assertIn(latin1_name, index) self.assertIn(utf8_name, index) self.assertTrue(os.path.exists(latin1_path)) self.assertTrue(os.path.exists(utf8_path)) def test_git_submodule(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: filea = Blob.from_string(b"file alalala") subtree = Tree() subtree[b"a"] = (stat.S_IFREG | 0o644, filea.id) c = Commit() c.tree = subtree.id c.committer = c.author = b"Somebody " c.commit_time = c.author_time = 42342 c.commit_timezone = c.author_timezone = 0 c.parents = [] c.message = b"Subcommit" tree = Tree() tree[b"c"] = (S_IFGITLINK, c.id) repo.object_store.add_objects([(o, None) for o in [tree]]) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() self.assertEqual(len(index), 1) # filea apath = os.path.join(repo.path, "c/a") self.assertFalse(os.path.exists(apath)) # dir c cpath = os.path.join(repo.path, "c") self.assertTrue(os.path.isdir(cpath)) self.assertEqual(index[b"c"][4], S_IFGITLINK) # mode self.assertEqual(index[b"c"][8], c.id) # sha def test_git_submodule_exists(self): repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: filea = Blob.from_string(b"file alalala") subtree = Tree() subtree[b"a"] = (stat.S_IFREG | 0o644, filea.id) c = Commit() c.tree = subtree.id c.committer = c.author = b"Somebody " c.commit_time = c.author_time = 42342 c.commit_timezone = c.author_timezone = 0 c.parents = [] c.message = b"Subcommit" tree = Tree() tree[b"c"] = (S_IFGITLINK, c.id) os.mkdir(os.path.join(repo_dir, "c")) repo.object_store.add_objects([(o, None) for o in [tree]]) build_index_from_tree( repo.path, repo.index_path(), repo.object_store, tree.id ) # Verify index entries index = repo.open_index() self.assertEqual(len(index), 1) # filea apath = os.path.join(repo.path, "c/a") self.assertFalse(os.path.exists(apath)) # dir c cpath = os.path.join(repo.path, "c") self.assertTrue(os.path.isdir(cpath)) self.assertEqual(index[b"c"][4], S_IFGITLINK) # mode self.assertEqual(index[b"c"][8], c.id) # sha class GetUnstagedChangesTests(TestCase): def test_get_unstaged_changes(self): """Unit test for get_unstaged_changes.""" repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Commit a dummy file then modify it foo1_fullpath = os.path.join(repo_dir, "foo1") with open(foo1_fullpath, "wb") as f: f.write(b"origstuff") foo2_fullpath = os.path.join(repo_dir, "foo2") with open(foo2_fullpath, "wb") as f: f.write(b"origstuff") repo.stage(["foo1", "foo2"]) repo.do_commit( b"test status", author=b"author ", committer=b"committer ", ) with open(foo1_fullpath, "wb") as f: f.write(b"newstuff") # modify access and modify time of path os.utime(foo1_fullpath, (0, 0)) - changes = get_unstaged_changes(repo.open_index(), repo) + changes = get_unstaged_changes(repo.open_index(), repo_dir) self.assertEqual(list(changes), [b"foo1"]) def test_get_unstaged_deleted_changes(self): """Unit test for get_unstaged_changes.""" repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Commit a dummy file then remove it foo1_fullpath = os.path.join(repo_dir, "foo1") with open(foo1_fullpath, "wb") as f: f.write(b"origstuff") repo.stage(["foo1"]) repo.do_commit( b"test status", author=b"author ", committer=b"committer ", ) os.unlink(foo1_fullpath) - changes = get_unstaged_changes(repo.open_index(), repo) + changes = get_unstaged_changes(repo.open_index(), repo_dir) self.assertEqual(list(changes), [b"foo1"]) def test_get_unstaged_changes_removed_replaced_by_directory(self): """Unit test for get_unstaged_changes.""" repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Commit a dummy file then modify it foo1_fullpath = os.path.join(repo_dir, "foo1") with open(foo1_fullpath, "wb") as f: f.write(b"origstuff") repo.stage(["foo1"]) repo.do_commit( b"test status", author=b"author ", committer=b"committer ", ) os.remove(foo1_fullpath) os.mkdir(foo1_fullpath) - changes = get_unstaged_changes(repo.open_index(), repo) + changes = get_unstaged_changes(repo.open_index(), repo_dir) self.assertEqual(list(changes), [b"foo1"]) @skipIf(not can_symlink(), "Requires symlink support") def test_get_unstaged_changes_removed_replaced_by_link(self): """Unit test for get_unstaged_changes.""" repo_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, repo_dir) with Repo.init(repo_dir) as repo: # Commit a dummy file then modify it foo1_fullpath = os.path.join(repo_dir, "foo1") with open(foo1_fullpath, "wb") as f: f.write(b"origstuff") repo.stage(["foo1"]) repo.do_commit( b"test status", author=b"author ", committer=b"committer ", ) os.remove(foo1_fullpath) os.symlink(os.path.dirname(foo1_fullpath), foo1_fullpath) - changes = get_unstaged_changes(repo.open_index(), repo) + changes = get_unstaged_changes(repo.open_index(), repo_dir) self.assertEqual(list(changes), [b"foo1"]) class TestValidatePathElement(TestCase): def test_default(self): self.assertTrue(validate_path_element_default(b"bla")) self.assertTrue(validate_path_element_default(b".bla")) self.assertFalse(validate_path_element_default(b".git")) self.assertFalse(validate_path_element_default(b".giT")) self.assertFalse(validate_path_element_default(b"..")) self.assertTrue(validate_path_element_default(b"git~1")) def test_ntfs(self): self.assertTrue(validate_path_element_ntfs(b"bla")) self.assertTrue(validate_path_element_ntfs(b".bla")) self.assertFalse(validate_path_element_ntfs(b".git")) self.assertFalse(validate_path_element_ntfs(b".giT")) self.assertFalse(validate_path_element_ntfs(b"..")) self.assertFalse(validate_path_element_ntfs(b"git~1")) class TestTreeFSPathConversion(TestCase): def test_tree_to_fs_path(self): tree_path = u"délwíçh/foo".encode("utf8") fs_path = _tree_to_fs_path(b"/prefix/path", tree_path) self.assertEqual( fs_path, os.fsencode(os.path.join(u"/prefix/path", u"délwíçh", u"foo")), ) def test_fs_to_tree_path_str(self): fs_path = os.path.join(os.path.join(u"délwíçh", u"foo")) tree_path = _fs_to_tree_path(fs_path) self.assertEqual(tree_path, u"délwíçh/foo".encode("utf-8")) def test_fs_to_tree_path_bytes(self): fs_path = os.path.join(os.fsencode(os.path.join(u"délwíçh", u"foo"))) tree_path = _fs_to_tree_path(fs_path) self.assertEqual(tree_path, u"délwíçh/foo".encode("utf-8"))