diff --git a/dulwich/index.py b/dulwich/index.py
index 4b464bde..3afb4363 100644
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -1,990 +1,988 @@
 # index.py -- File parser/writer for the git index file
 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 
 """Parser for the git index file format."""
 
 import collections
 import os
 import stat
 import struct
 import sys
 from typing import (
     Any,
     BinaryIO,
     Callable,
     Dict,
     List,
     Optional,
     TYPE_CHECKING,
     Iterable,
     Iterator,
     Tuple,
 )
 
 if TYPE_CHECKING:
     from dulwich.object_store import BaseObjectStore
 
 from dulwich.file import GitFile
 from dulwich.objects import (
     Blob,
     S_IFGITLINK,
     S_ISGITLINK,
     Tree,
     hex_to_sha,
     sha_to_hex,
 )
 from dulwich.pack import (
     SHA1Reader,
     SHA1Writer,
 )
 
 
 # TODO(jelmer): Switch to dataclass?
 IndexEntry = collections.namedtuple(
     "IndexEntry",
     [
         "ctime",
         "mtime",
         "dev",
         "ino",
         "mode",
         "uid",
         "gid",
         "size",
         "sha",
         "flags",
         "extended_flags",
     ],
 )
 
 
 # 2-bit stage (during merge)
 FLAG_STAGEMASK = 0x3000
 
 # assume-valid
 FLAG_VALID = 0x8000
 
 # extended flag (must be zero in version 2)
 FLAG_EXTENDED = 0x4000
 
 
 # used by sparse checkout
 EXTENDED_FLAG_SKIP_WORKTREE = 0x4000
 
 # used by "git add -N"
 EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
 
 
 DEFAULT_VERSION = 2
 
 
 def pathsplit(path):
     """Split a /-delimited path into a directory part and a basename.
 
     Args:
       path: The path to split.
     Returns:
       Tuple with directory name and basename
     """
     try:
         (dirname, basename) = path.rsplit(b"/", 1)
     except ValueError:
         return (b"", path)
     else:
         return (dirname, basename)
 
 
 def pathjoin(*args):
     """Join a /-delimited path."""
     return b"/".join([p for p in args if p])
 
 
 def read_cache_time(f):
     """Read a cache time.
 
     Args:
       f: File-like object to read from
     Returns:
       Tuple with seconds and nanoseconds
     """
     return struct.unpack(">LL", f.read(8))
 
 
 def write_cache_time(f, t):
     """Write a cache time.
 
     Args:
       f: File-like object to write to
       t: Time to write (as int, float or tuple with secs and nsecs)
     """
     if isinstance(t, int):
         t = (t, 0)
     elif isinstance(t, float):
         (secs, nsecs) = divmod(t, 1.0)
         t = (int(secs), int(nsecs * 1000000000))
     elif not isinstance(t, tuple):
         raise TypeError(t)
     f.write(struct.pack(">LL", *t))
 
 
 def read_cache_entry(f, version: int) -> Tuple[str, IndexEntry]:
     """Read an entry from a cache file.
 
     Args:
       f: File-like object to read from
     Returns:
       tuple with: name, IndexEntry
     """
     beginoffset = f.tell()
     ctime = read_cache_time(f)
     mtime = read_cache_time(f)
     (
         dev,
         ino,
         mode,
         uid,
         gid,
         size,
         sha,
         flags,
     ) = struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
     if flags & FLAG_EXTENDED:
         if version < 3:
             raise AssertionError(
                 'extended flag set in index with version < 3')
         extended_flags = struct.unpack(">H", f.read(2))
     else:
         extended_flags = 0
     name = f.read((flags & 0x0FFF))
     # Padding:
     real_size = (f.tell() - beginoffset + 8) & ~7
     f.read((beginoffset + real_size) - f.tell())
     return (
         name,
         IndexEntry(
             ctime,
             mtime,
             dev,
             ino,
             mode,
             uid,
             gid,
             size,
             sha_to_hex(sha),
             flags & ~0x0FFF,
             extended_flags,
         ))
 
 
 def write_cache_entry(f, name, entry, version=None):
     """Write an index entry to a file.
 
     Args:
       f: File object
       entry: IndexEntry to write, tuple with:
     """
     beginoffset = f.tell()
     write_cache_time(f, entry.ctime)
     write_cache_time(f, entry.mtime)
     flags = len(name) | (entry.flags & ~0x0FFF)
     if entry.extended_flags:
         flags |= FLAG_EXTENDED
     if flags & FLAG_EXTENDED and version is not None and version < 3:
         raise AssertionError('unable to use extended flags in version < 3')
     f.write(
         struct.pack(
             b">LLLLLL20sH",
             entry.dev & 0xFFFFFFFF,
             entry.ino & 0xFFFFFFFF,
             entry.mode,
             entry.uid,
             entry.gid,
             entry.size,
             hex_to_sha(entry.sha),
             flags,
         )
     )
     if flags & FLAG_EXTENDED:
         f.write(struct.pack(b">H", entry.extended_flags))
     f.write(name)
     real_size = (f.tell() - beginoffset + 8) & ~7
     f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
 
 
 def read_index(f: BinaryIO):
     """Read an index file, yielding the individual entries."""
     header = f.read(4)
     if header != b"DIRC":
         raise AssertionError("Invalid index file header: %r" % header)
     (version, num_entries) = struct.unpack(b">LL", f.read(4 * 2))
     assert version in (1, 2, 3), "index version is %r" % version
     for i in range(num_entries):
         yield read_cache_entry(f, version)
 
 
 def read_index_dict(f):
     """Read an index file and return it as a dictionary.
 
     Args:
       f: File object to read from
     """
     ret = {}
     for name, entry in read_index(f):
         ret[name] = entry
     return ret
 
 
 def write_index(f: BinaryIO, entries: Iterable[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
     """Write an index file.
 
     Args:
       f: File-like object to write to
       version: Version number to write
       entries: Iterable over the entries to write
     """
     if version is None:
         version = DEFAULT_VERSION
     f.write(b"DIRC")
     f.write(struct.pack(b">LL", version, len(entries)))
     for name, entry in entries:
         write_cache_entry(f, name, entry, version)
 
 
 def write_index_dict(
     f: BinaryIO,
     entries: Dict[bytes, IndexEntry],
     version: Optional[int] = None,
 ) -> None:
     """Write an index file based on the contents of a dictionary."""
     entries_list = []
     for name in sorted(entries):
         entries_list.append((name, entries[name]))
     write_index(f, entries_list, version=version)
 
 
 def cleanup_mode(mode: int) -> int:
     """Cleanup a mode value.
 
     This will return a mode that can be stored in a tree object.
 
     Args:
       mode: Mode to clean up.
     Returns:
       mode
     """
     if stat.S_ISLNK(mode):
         return stat.S_IFLNK
     elif stat.S_ISDIR(mode):
         return stat.S_IFDIR
     elif S_ISGITLINK(mode):
         return S_IFGITLINK
     ret = stat.S_IFREG | 0o644
     if mode & 0o100:
         ret |= 0o111
     return ret
 
 
 class Index(object):
     """A Git Index file."""
 
     def __init__(self, filename):
         """Open an index file.
 
         Args:
           filename: Path to the index file
         """
         self._filename = filename
         # TODO(jelmer): Store the version returned by read_index
         self._version = None
         self.clear()
         self.read()
 
     @property
     def path(self):
         return self._filename
 
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._filename)
 
     def write(self) -> None:
         """Write current contents of index to disk."""
         f = GitFile(self._filename, "wb")
         try:
             f = SHA1Writer(f)
             write_index_dict(f, self._byname, version=self._version)
         finally:
             f.close()
 
     def read(self):
         """Read current contents of index from disk."""
         if not os.path.exists(self._filename):
             return
         f = GitFile(self._filename, "rb")
         try:
             f = SHA1Reader(f)
             for name, entry in read_index(f):
                 self[name] = entry
             # FIXME: Additional data?
             f.read(os.path.getsize(self._filename) - f.tell() - 20)
             f.check_sha()
         finally:
             f.close()
 
     def __len__(self) -> int:
         """Number of entries in this index file."""
         return len(self._byname)
 
     def __getitem__(self, name: bytes) -> IndexEntry:
         """Retrieve entry by relative path.
 
         Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
             flags)
         """
         return self._byname[name]
 
     def __iter__(self) -> Iterator[bytes]:
         """Iterate over the paths in this index."""
         return iter(self._byname)
 
     def get_sha1(self, path: bytes) -> bytes:
         """Return the (git object) SHA1 for the object at a path."""
         return self[path].sha
 
     def get_mode(self, path: bytes) -> int:
         """Return the POSIX file mode for the object at a path."""
         return self[path].mode
 
     def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
         """Iterate over path, sha, mode tuples for use with commit_tree."""
         for path in self:
             entry = self[path]
             yield path, entry.sha, cleanup_mode(entry.mode)
 
     def iterblobs(self):
         import warnings
 
         warnings.warn("Use iterobjects() instead.", PendingDeprecationWarning)
         return self.iterobjects()
 
     def clear(self):
         """Remove all contents from this index."""
         self._byname = {}
 
     def __setitem__(self, name, x):
         assert isinstance(name, bytes)
         assert len(x) == len(IndexEntry._fields)
         # Remove the old entry if any
         self._byname[name] = IndexEntry(*x)
 
     def __delitem__(self, name):
         assert isinstance(name, bytes)
         del self._byname[name]
 
     def iteritems(self):
         return self._byname.items()
 
     def items(self):
         return self._byname.items()
 
     def update(self, entries):
         for name, value in entries.items():
             self[name] = value
 
     def changes_from_tree(self, object_store, tree, want_unchanged=False):
         """Find the differences between the contents of this index and a tree.
 
         Args:
           object_store: Object store to use for retrieving tree contents
           tree: SHA1 of the root tree
           want_unchanged: Whether unchanged files should be reported
         Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
             newmode), (oldsha, newsha)
         """
 
         def lookup_entry(path):
             entry = self[path]
             return entry.sha, cleanup_mode(entry.mode)
 
         for (name, mode, sha) in changes_from_tree(
             self._byname.keys(),
             lookup_entry,
             object_store,
             tree,
             want_unchanged=want_unchanged,
         ):
             yield (name, mode, sha)
 
     def commit(self, object_store):
         """Create a new tree from an index.
 
         Args:
           object_store: Object store to save the tree in
         Returns:
           Root tree SHA
         """
         return commit_tree(object_store, self.iterobjects())
 
 
 def commit_tree(
     object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
 ) -> bytes:
     """Commit a new tree.
 
     Args:
       object_store: Object store to add trees to
       blobs: Iterable over blob path, sha, mode entries
     Returns:
       SHA1 of the created tree.
     """
     trees = {b"": {}}  # type: Dict[bytes, Any]
 
     def add_tree(path):
         if path in trees:
             return trees[path]
         dirname, basename = pathsplit(path)
         t = add_tree(dirname)
         assert isinstance(basename, bytes)
         newtree = {}
         t[basename] = newtree
         trees[path] = newtree
         return newtree
 
     for path, sha, mode in blobs:
         tree_path, basename = pathsplit(path)
         tree = add_tree(tree_path)
         tree[basename] = (mode, sha)
 
     def build_tree(path):
         tree = Tree()
         for basename, entry in trees[path].items():
             if isinstance(entry, dict):
                 mode = stat.S_IFDIR
                 sha = build_tree(pathjoin(path, basename))
             else:
                 (mode, sha) = entry
             tree.add(basename, mode, sha)
         object_store.add_object(tree)
         return tree.id
 
     return build_tree(b"")
 
 
 def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
     """Create a new tree from an index.
 
     Args:
       object_store: Object store to save the tree in
       index: Index file
     Note: This function is deprecated, use index.commit() instead.
     Returns: Root tree sha.
     """
     return commit_tree(object_store, index.iterobjects())
 
 
 def changes_from_tree(
     names: Iterable[bytes],
     lookup_entry: Callable[[bytes], Tuple[bytes, int]],
     object_store: "BaseObjectStore",
     tree: Optional[bytes],
     want_unchanged=False,
 ) -> Iterable[
     Tuple[
         Tuple[Optional[bytes], Optional[bytes]],
         Tuple[Optional[int], Optional[int]],
         Tuple[Optional[bytes], Optional[bytes]],
     ]
 ]:
     """Find the differences between the contents of a tree and
     a working copy.
 
     Args:
       names: Iterable of names in the working copy
       lookup_entry: Function to lookup an entry in the working copy
       object_store: Object store to use for retrieving tree contents
       tree: SHA1 of the root tree, or None for an empty tree
       want_unchanged: Whether unchanged files should be reported
     Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
         (oldsha, newsha)
     """
     # TODO(jelmer): Support a include_trees option
     other_names = set(names)
 
     if tree is not None:
         for (name, mode, sha) in object_store.iter_tree_contents(tree):
             try:
                 (other_sha, other_mode) = lookup_entry(name)
             except KeyError:
                 # Was removed
                 yield ((name, None), (mode, None), (sha, None))
             else:
                 other_names.remove(name)
                 if want_unchanged or other_sha != sha or other_mode != mode:
                     yield ((name, name), (mode, other_mode), (sha, other_sha))
 
     # Mention added files
     for name in other_names:
         try:
             (other_sha, other_mode) = lookup_entry(name)
         except KeyError:
             pass
         else:
             yield ((None, name), (None, other_mode), (None, other_sha))
 
 
 def index_entry_from_stat(
     stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
     extended_flags: Optional[int]  =None
 ):
     """Create a new index entry from a stat value.
 
     Args:
       stat_val: POSIX stat_result instance
       hex_sha: Hex sha of the object
       flags: Index flags
     """
     if mode is None:
         mode = cleanup_mode(stat_val.st_mode)
 
     return IndexEntry(
         stat_val.st_ctime,
         stat_val.st_mtime,
         stat_val.st_dev,
         stat_val.st_ino,
         mode,
         stat_val.st_uid,
         stat_val.st_gid,
         stat_val.st_size,
         hex_sha,
         flags,
         extended_flags
     )
 
 
 def build_file_from_blob(
     blob, mode, target_path, honor_filemode=True, tree_encoding="utf-8"
 ):
     """Build a file or symlink on disk based on a Git object.
 
     Args:
       obj: The git object
       mode: File mode
       target_path: Path to write to
       honor_filemode: An optional flag to honor core.filemode setting in
         config file, default is core.filemode=True, change executable bit
     Returns: stat object for the file
     """
     try:
         oldstat = os.lstat(target_path)
     except FileNotFoundError:
         oldstat = None
     contents = blob.as_raw_string()
     if stat.S_ISLNK(mode):
         # FIXME: This will fail on Windows. What should we do instead?
         if oldstat:
             os.unlink(target_path)
         if sys.platform == "win32":
             # os.readlink on Python3 on Windows requires a unicode string.
             contents = contents.decode(tree_encoding)
             target_path = target_path.decode(tree_encoding)
         os.symlink(contents, target_path)
     else:
         if oldstat is not None and oldstat.st_size == len(contents):
             with open(target_path, "rb") as f:
                 if f.read() == contents:
                     return oldstat
 
         with open(target_path, "wb") as f:
             # Write out file
             f.write(contents)
 
         if honor_filemode:
             os.chmod(target_path, mode)
 
     return os.lstat(target_path)
 
 
 INVALID_DOTNAMES = (b".git", b".", b"..", b"")
 
 
 def validate_path_element_default(element):
     return element.lower() not in INVALID_DOTNAMES
 
 
 def validate_path_element_ntfs(element):
     stripped = element.rstrip(b". ").lower()
     if stripped in INVALID_DOTNAMES:
         return False
     if stripped == b"git~1":
         return False
     return True
 
 
 def validate_path(path, element_validator=validate_path_element_default):
     """Default path validator that just checks for .git/."""
     parts = path.split(b"/")
     for p in parts:
         if not element_validator(p):
             return False
     else:
         return True
 
 
 def build_index_from_tree(
     root_path,
     index_path,
     object_store,
     tree_id,
     honor_filemode=True,
     validate_path_element=validate_path_element_default,
 ):
     """Generate and materialize index from a tree
 
     Args:
       tree_id: Tree to materialize
       root_path: Target dir for materialized index files
       index_path: Target path for generated index
       object_store: Non-empty object store holding tree contents
       honor_filemode: An optional flag to honor core.filemode setting in
         config file, default is core.filemode=True, change executable bit
       validate_path_element: Function to validate path elements to check
         out; default just refuses .git and .. directories.
 
     Note: existing index is wiped and contents are not merged
         in a working dir. Suitable only for fresh clones.
     """
 
     index = Index(index_path)
     if not isinstance(root_path, bytes):
         root_path = os.fsencode(root_path)
 
     for entry in object_store.iter_tree_contents(tree_id):
         if not validate_path(entry.path, validate_path_element):
             continue
         full_path = _tree_to_fs_path(root_path, entry.path)
 
         if not os.path.exists(os.path.dirname(full_path)):
             os.makedirs(os.path.dirname(full_path))
 
         # TODO(jelmer): Merge new index into working tree
         if S_ISGITLINK(entry.mode):
             if not os.path.isdir(full_path):
                 os.mkdir(full_path)
             st = os.lstat(full_path)
             # TODO(jelmer): record and return submodule paths
         else:
             obj = object_store[entry.sha]
             st = build_file_from_blob(
                 obj, entry.mode, full_path, honor_filemode=honor_filemode
             )
 
         # Add file to index
         if not honor_filemode or S_ISGITLINK(entry.mode):
             # we can not use tuple slicing to build a new tuple,
             # because on windows that will convert the times to
             # longs, which causes errors further along
             st_tuple = (
                 entry.mode,
                 st.st_ino,
                 st.st_dev,
                 st.st_nlink,
                 st.st_uid,
                 st.st_gid,
                 st.st_size,
                 st.st_atime,
                 st.st_mtime,
                 st.st_ctime,
             )
             st = st.__class__(st_tuple)
         index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
 
     index.write()
 
 
 def blob_from_path_and_mode(fs_path, mode, tree_encoding="utf-8"):
     """Create a blob from a path and a stat object.
 
     Args:
       fs_path: Full file system path to file
       st: A stat object
     Returns: A `Blob` object
     """
     assert isinstance(fs_path, bytes)
     blob = Blob()
     if stat.S_ISLNK(mode):
         if sys.platform == "win32":
             # os.readlink on Python3 on Windows requires a unicode string.
             fs_path = os.fsdecode(fs_path)
             blob.data = os.readlink(fs_path).encode(tree_encoding)
         else:
             blob.data = os.readlink(fs_path)
     else:
         with open(fs_path, "rb") as f:
             blob.data = f.read()
     return blob
 
 
 def blob_from_path_and_stat(fs_path, st, tree_encoding="utf-8"):
     """Create a blob from a path and a stat object.
 
     Args:
       fs_path: Full file system path to file
       st: A stat object
     Returns: A `Blob` object
     """
     return blob_from_path_and_mode(fs_path, st.st_mode, tree_encoding)
 
 
 def read_submodule_head(path):
     """Read the head commit of a submodule.
 
     Args:
       path: path to the submodule
     Returns: HEAD sha, None if not a valid head/repository
     """
     from dulwich.errors import NotGitRepository
     from dulwich.repo import Repo
 
     # Repo currently expects a "str", so decode if necessary.
     # TODO(jelmer): Perhaps move this into Repo() ?
     if not isinstance(path, str):
         path = os.fsdecode(path)
     try:
         repo = Repo(path)
     except NotGitRepository:
         return None
     try:
         return repo.head()
     except KeyError:
         return None
 
 
 def _has_directory_changed(tree_path, entry):
     """Check if a directory has changed after getting an error.
 
     When handling an error trying to create a blob from a path, call this
     function. It will check if the path is a directory. If it's a directory
     and a submodule, check the submodule head to see if it's has changed. If
     not, consider the file as changed as Git tracked a file and not a
     directory.
 
     Return true if the given path should be considered as changed and False
     otherwise or if the path is not a directory.
     """
     # This is actually a directory
     if os.path.exists(os.path.join(tree_path, b".git")):
         # Submodule
         head = read_submodule_head(tree_path)
         if entry.sha != head:
             return True
     else:
         # The file was changed to a directory, so consider it removed.
         return True
 
     return False
 
 
-def get_unstaged_changes(index: Index, repo, filter_blob_callback=None):
+def get_unstaged_changes(index: Index, root_path, filter_blob_callback=None):
     """Walk through an index and check for differences against working tree.
 
     Args:
       index: index to check
       root_path: path in which to find files
     Returns: iterator over paths with unstaged changes
     """
-    root_path = repo.path
     # For each entry in the index check the sha1 & ensure not staged
     if not isinstance(root_path, bytes):
         root_path = os.fsencode(root_path)
 
     for tree_path, entry in index.iteritems():
         full_path = _tree_to_fs_path(root_path, tree_path)
         try:
             st = os.lstat(full_path)
             if stat.S_ISDIR(st.st_mode):
                 if _has_directory_changed(tree_path, entry):
                     yield tree_path
                 continue
 
             if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
                 continue
 
             blob = blob_from_path_and_stat(full_path, st)
 
             if filter_blob_callback is not None:
-                # Check if the file is already in the index
                 try:
                     index[tree_path]
                     new_file = False
                 except KeyError:
                     new_file = True
 
                 blob = filter_blob_callback(blob, tree_path, new_file)
         except FileNotFoundError:
             # The file was removed, so we assume that counts as
             # different from whatever file used to exist.
             yield tree_path
         else:
             if blob.id != entry.sha:
                 yield tree_path
 
 
 os_sep_bytes = os.sep.encode("ascii")
 
 
 def _tree_to_fs_path(root_path, tree_path: bytes):
     """Convert a git tree path to a file system path.
 
     Args:
       root_path: Root filesystem path
       tree_path: Git tree path as bytes
 
     Returns: File system path.
     """
     assert isinstance(tree_path, bytes)
     if os_sep_bytes != b"/":
         sep_corrected_path = tree_path.replace(b"/", os_sep_bytes)
     else:
         sep_corrected_path = tree_path
     return os.path.join(root_path, sep_corrected_path)
 
 
 def _fs_to_tree_path(fs_path):
     """Convert a file system path to a git tree path.
 
     Args:
       fs_path: File system path.
 
     Returns:  Git tree path as bytes
     """
     if not isinstance(fs_path, bytes):
         fs_path_bytes = os.fsencode(fs_path)
     else:
         fs_path_bytes = fs_path
     if os_sep_bytes != b"/":
         tree_path = fs_path_bytes.replace(os_sep_bytes, b"/")
     else:
         tree_path = fs_path_bytes
     return tree_path
 
 
 def index_entry_from_path(path, object_store=None):
     """Create an index from a filesystem path.
 
     This returns an index value for files, symlinks
     and tree references. for directories and
     non-existant files it returns None
 
     Args:
       path: Path to create an index entry for
       object_store: Optional object store to
         save new blobs in
     Returns: An index entry; None for directories
     """
     assert isinstance(path, bytes)
     st = os.lstat(path)
     if stat.S_ISDIR(st.st_mode):
         if os.path.exists(os.path.join(path, b".git")):
             head = read_submodule_head(path)
             if head is None:
                 return None
             return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
         return None
 
     if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
         blob = blob_from_path_and_stat(path, st)
         if object_store is not None:
             object_store.add_object(blob)
         return index_entry_from_stat(st, blob.id, 0)
 
     return None
 
 
 def iter_fresh_entries(
     paths, root_path, object_store: Optional["BaseObjectStore"] = None
 ):
     """Iterate over current versions of index entries on disk.
 
     Args:
       paths: Paths to iterate over
       root_path: Root path to access from
       store: Optional store to save new blobs in
     Returns: Iterator over path, index_entry
     """
     for path in paths:
         p = _tree_to_fs_path(root_path, path)
         try:
             entry = index_entry_from_path(p, object_store=object_store)
         except (FileNotFoundError, IsADirectoryError):
             entry = None
         yield path, entry
 
 
 def iter_fresh_blobs(index, root_path):
     """Iterate over versions of blobs on disk referenced by index.
 
     Don't use this function; it removes missing entries from index.
 
     Args:
       index: Index file
       root_path: Root path to access from
       include_deleted: Include deleted entries with sha and
         mode set to None
     Returns: Iterator over path, sha, mode
     """
     import warnings
 
     warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.")
     for entry in iter_fresh_objects(index, root_path, include_deleted=True):
         if entry[1] is None:
             del index[entry[0]]
         else:
             yield entry
 
 
 def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None):
     """Iterate over versions of objecs on disk referenced by index.
 
     Args:
       root_path: Root path to access from
       include_deleted: Include deleted entries with sha and
         mode set to None
       object_store: Optional object store to report new items to
     Returns: Iterator over path, sha, mode
     """
     for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store):
         if entry is None:
             if include_deleted:
                 yield path, None, None
         else:
             entry = IndexEntry(*entry)
             yield path, entry.sha, cleanup_mode(entry.mode)
 
 
 def refresh_index(index, root_path):
     """Refresh the contents of an index.
 
     This is the equivalent to running 'git commit -a'.
 
     Args:
       index: Index to update
       root_path: Root filesystem path
     """
     for path, entry in iter_fresh_entries(index, root_path):
         index[path] = path
diff --git a/dulwich/line_ending.py b/dulwich/line_ending.py
index 6f149a20..690beada 100644
--- a/dulwich/line_ending.py
+++ b/dulwich/line_ending.py
@@ -1,293 +1,288 @@
 # line_ending.py -- Line ending conversion functions
 # Copyright (C) 2018-2018 Boris Feld <boris.feld@comet.ml>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 """ All line-ending related functions, from conversions to config processing
 
 Line-ending normalization is a complex beast. Here is some notes and details
 about how it seems to work.
 
 The normalization is a two-fold process that happens at two moments:
 
 - When reading a file from the index and to the working directory. For example
   when doing a `git clone` or `git checkout` call. We call this process the
   read filter in this module.
 - When writing a file to the index from the working directory. For example
   when doing a `git add` call. We call this process the write filter in this
   module.
 
-The normalization only happens when the resulting file does not exists yet.
-For the write filter, they are files that are shown as added in status.
+Note that when checking status (getting unstaged changes), whether or not
+normalization is done on write depends on whether or not the file in the
+working dir has also been normalized on read:
+
+- For autocrlf=true all files are always normalized on both read and write.
+- For autocrlf=input files are only normalized once - whenever a new file is
+  added to the index. Since files which already exist in the index are
+  unmodified on read, they are also left unmodified upon subsequent writes.
 
 One thing to know is that Git does line-ending normalization only on text
 files. How does Git know that a file is text? We can either mark a file as a
 text file, a binary file or ask Git to automatically decides. Git has an
 heuristic to detect if a file is a text file or a binary file. It seems based
 on the percentage of non-printable characters in files.
 
 The code for this heuristic is here:
 https://git.kernel.org/pub/scm/git/git.git/tree/convert.c#n46
 
 Dulwich have an implementation with a slightly different heuristic, the
 `is_binary` function in `dulwich.patch`.
 
 The binary detection heuristic implementation is close to the one in JGit:
 https://github.com/eclipse/jgit/blob/f6873ffe522bbc3536969a3a3546bf9a819b92bf/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java#L300
 
 There is multiple variables that impact the normalization.
 
 First, a repository can contains a `.gitattributes` file (or more than one...)
 that can further customize the operation on some file patterns, for example:
 
     *.txt text
 
 Force all `.txt` files to be treated as text files and to have their lines
 endings normalized.
 
     *.jpg -text
 
 Force all `.jpg` files to be treated as binary files and to not have their
 lines endings converted.
 
     *.vcproj text eol=crlf
 
 Force all `.vcproj` files to be treated as text files and to have their lines
 endings converted into `CRLF` in working directory no matter the native EOL of
 the platform.
 
     *.sh text eol=lf
 
 Force all `.sh` files to be treated as text files and to have their lines
 endings converted into `LF` in working directory no matter the native EOL of
 the platform.
 
 If the `eol` attribute is not defined, Git uses the `core.eol` configuration
 value described later.
 
     * text=auto
 
 Force all files to be scanned by the text file heuristic detection and to have
 their line endings normalized in case they are detected as text files.
 
 Git also have a obsolete attribute named `crlf` that can be translated to the
 corresponding text attribute value.
 
 Then there are some configuration option (that can be defined at the
 repository or user level):
 
 - core.autocrlf
 - core.eol
 
 `core.autocrlf` is taken into account for all files that doesn't have a `text`
 attribute defined in `.gitattributes`; it takes three possible values:
 
     - `true`: This forces all files on the working directory to have CRLF
       line-endings in the working directory and convert line-endings to LF
       when writing to the index. When autocrlf is set to true, eol value is
       ignored.
     - `input`: Quite similar to the `true` value but only force the write
       filter, ie line-ending of new files added to the index will get their
       line-endings converted to LF.
     - `false` (default): No normalization is done.
 
 `core.eol` is the top-level configuration to define the line-ending to use
 when applying the read_filer. It takes three possible values:
 
     - `lf`: When normalization is done, force line-endings to be `LF` in the
       working directory.
     - `crlf`: When normalization is done, force line-endings to be `CRLF` in
       the working directory.
     - `native` (default): When normalization is done, force line-endings to be
       the platform's native line ending.
 
 One thing to remember is when line-ending normalization is done on a file, Git
 always normalize line-ending to `LF` when writing to the index.
 
 There are sources that seems to indicate that Git won't do line-ending
 normalization when a file contains mixed line-endings. I think this logic
 might be in text / binary detection heuristic but couldn't find it yet.
 
 Sources:
 - https://git-scm.com/docs/git-config#git-config-coreeol
 - https://git-scm.com/docs/git-config#git-config-coreautocrlf
 - https://git-scm.com/docs/gitattributes#_checking_out_and_checking_in
 - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/
 """
 
 from dulwich.objects import Blob
 from dulwich.patch import is_binary
 
 CRLF = b"\r\n"
 LF = b"\n"
 
 
 def convert_crlf_to_lf(text_hunk):
     """Convert CRLF in text hunk into LF
 
     Args:
       text_hunk: A bytes string representing a text hunk
     Returns: The text hunk with the same type, with CRLF replaced into LF
     """
     return text_hunk.replace(CRLF, LF)
 
 
 def convert_lf_to_crlf(text_hunk):
     """Convert LF in text hunk into CRLF
 
     Args:
       text_hunk: A bytes string representing a text hunk
     Returns: The text hunk with the same type, with LF replaced into CRLF
     """
     # TODO find a more efficient way of doing it
     intermediary = text_hunk.replace(CRLF, LF)
     return intermediary.replace(LF, CRLF)
 
 
 def get_checkout_filter(core_eol, core_autocrlf, git_attributes):
     """Returns the correct checkout filter based on the passed arguments"""
     # TODO this function should process the git_attributes for the path and if
     # the text attribute is not defined, fallback on the
     # get_checkout_filter_autocrlf function with the autocrlf value
     return get_checkout_filter_autocrlf(core_autocrlf)
 
 
 def get_checkin_filter(core_eol, core_autocrlf, git_attributes):
     """Returns the correct checkin filter based on the passed arguments"""
     # TODO this function should process the git_attributes for the path and if
     # the text attribute is not defined, fallback on the
     # get_checkin_filter_autocrlf function with the autocrlf value
     return get_checkin_filter_autocrlf(core_autocrlf)
 
 
 def get_checkout_filter_autocrlf(core_autocrlf):
     """Returns the correct checkout filter base on autocrlf value
 
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
         Valid values are: b'true', b'false' or b'input'.
     Returns: Either None if no filter has to be applied or a function
         accepting a single argument, a binary text hunk
     """
 
     if core_autocrlf == b"true":
         return convert_lf_to_crlf
 
     return None
 
 
 def get_checkin_filter_autocrlf(core_autocrlf):
     """Returns the correct checkin filter base on autocrlf value
 
     Args:
       core_autocrlf: The bytes configuration value of core.autocrlf.
         Valid values are: b'true', b'false' or b'input'.
     Returns: Either None if no filter has to be applied or a function
         accepting a single argument, a binary text hunk
     """
 
     if core_autocrlf == b"true" or core_autocrlf == b"input":
         return convert_crlf_to_lf
 
     # Checking filter should never be `convert_lf_to_crlf`
     return None
 
 
 class BlobNormalizer(object):
     """An object to store computation result of which filter to apply based
     on configuration, gitattributes, path and operation (checkin or checkout)
     """
 
     def __init__(self, config_stack, gitattributes):
         self.config_stack = config_stack
         self.gitattributes = gitattributes
 
         # Compute which filters we needs based on parameters
         try:
             core_eol = config_stack.get("core", "eol")
         except KeyError:
             core_eol = "native"
 
         try:
             core_autocrlf = config_stack.get("core", "autocrlf").lower()
         except KeyError:
             core_autocrlf = False
 
         self.fallback_read_filter = get_checkout_filter(
             core_eol, core_autocrlf, self.gitattributes
         )
         self.fallback_write_filter = get_checkin_filter(
             core_eol, core_autocrlf, self.gitattributes
         )
 
-    def checkin_normalize(self, blob, tree_path, new_file=True):
-        """ Normalize a blob during a checkin operation
-
-        new_file is set to True by default for backward-compatibility
-        """
-        if not new_file:
-            # Line-ending normalization only happens for new files, aka files
-            # not already commited
-            return blob
-
-        if self.fallback_write_filter is not None:
+    def checkin_normalize(self, blob, tree_path, new_file=False):
+        """Normalize a blob during a checkin operation"""
+        # Existing files should only be normalized on checkin if it was
+        # previously normalized on checkout
+        if (
+            self.fallback_write_filter is not None
+            and (self.fallback_read_filter is not None or new_file)
+        ):
             return normalize_blob(
                 blob, self.fallback_write_filter, binary_detection=True
             )
 
         return blob
 
-    def checkout_normalize(self, blob, tree_path, new_file=True):
-        """ Normalize a blob during a checkout operation
-
-        new_file is set to True by default for backward-compatibility
-        """
-        if not new_file:
-            # Line-ending normalization only happens for new files, aka files
-            # not already commited
-            return blob
-
+    def checkout_normalize(self, blob, tree_path):
+        """Normalize a blob during a checkout operation"""
         if self.fallback_read_filter is not None:
             return normalize_blob(
                 blob, self.fallback_read_filter, binary_detection=True
             )
 
         return blob
 
 
 def normalize_blob(blob, conversion, binary_detection):
     """Takes a blob as input returns either the original blob if
     binary_detection is True and the blob content looks like binary, else
     return a new blob with converted data
     """
     # Read the original blob
     data = blob.data
 
     # If we need to detect if a file is binary and the file is detected as
     # binary, do not apply the conversion function and return the original
     # chunked text
     if binary_detection is True:
         if is_binary(data):
             return blob
 
     # Now apply the conversion
     converted_data = conversion(data)
 
     new_blob = Blob()
     new_blob.data = converted_data
 
     return new_blob
diff --git a/dulwich/porcelain.py b/dulwich/porcelain.py
index 0209c88c..ea1722ed 100644
--- a/dulwich/porcelain.py
+++ b/dulwich/porcelain.py
@@ -1,1924 +1,1924 @@
 # porcelain.py -- Porcelain-like layer on top of Dulwich
 # Copyright (C) 2013 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 
 """Simple wrapper that provides porcelain-like functions on top of Dulwich.
 
 Currently implemented:
  * archive
  * add
  * branch{_create,_delete,_list}
  * check-ignore
  * checkout
  * clone
  * commit
  * commit-tree
  * daemon
  * describe
  * diff-tree
  * fetch
  * init
  * ls-files
  * ls-remote
  * ls-tree
  * pull
  * push
  * rm
  * remote{_add}
  * receive-pack
  * reset
  * rev-list
  * tag{_create,_delete,_list}
  * upload-pack
  * update-server-info
  * status
  * symbolic-ref
 
 These functions are meant to behave similarly to the git subcommands.
 Differences in behaviour are considered bugs.
 
 Functions should generally accept both unicode strings and bytestrings
 """
 
 from collections import namedtuple
 from contextlib import (
     closing,
     contextmanager,
 )
 from io import BytesIO, RawIOBase
 import datetime
 import os
 from pathlib import Path
 import posixpath
 import shutil
 import stat
 import sys
 import time
 from typing import (
     Dict,
     Optional,
     Tuple,
     Union,
 )
 
 from dulwich.archive import (
     tar_stream,
 )
 from dulwich.client import (
     get_transport_and_path,
 )
 from dulwich.config import (
     StackedConfig,
 )
 from dulwich.diff_tree import (
     CHANGE_ADD,
     CHANGE_DELETE,
     CHANGE_MODIFY,
     CHANGE_RENAME,
     CHANGE_COPY,
     RENAME_CHANGE_TYPES,
 )
 from dulwich.errors import (
     SendPackError,
 )
 from dulwich.graph import (
     can_fast_forward,
 )
 from dulwich.ignore import IgnoreFilterManager
 from dulwich.index import (
     blob_from_path_and_stat,
     get_unstaged_changes,
 )
 from dulwich.object_store import (
     tree_lookup_path,
 )
 from dulwich.objects import (
     Commit,
     Tag,
     format_timezone,
     parse_timezone,
     pretty_format_tree_entry,
 )
 from dulwich.objectspec import (
     parse_commit,
     parse_object,
     parse_ref,
     parse_reftuples,
     parse_tree,
 )
 from dulwich.pack import (
     write_pack_index,
     write_pack_objects,
 )
 from dulwich.patch import write_tree_diff
 from dulwich.protocol import (
     Protocol,
     ZERO_SHA,
 )
 from dulwich.refs import (
     ANNOTATED_TAG_SUFFIX,
     LOCAL_BRANCH_PREFIX,
     strip_peeled_refs,
     RefsContainer,
 )
 from dulwich.repo import BaseRepo, Repo
 from dulwich.server import (
     FileSystemBackend,
     TCPGitServer,
     ReceivePackHandler,
     UploadPackHandler,
     update_server_info as server_update_server_info,
 )
 
 
 # Module level tuple definition for status output
 GitStatus = namedtuple("GitStatus", "staged unstaged untracked")
 
 
 class NoneStream(RawIOBase):
     """Fallback if stdout or stderr are unavailable, does nothing."""
 
     def read(self, size=-1):
         return None
 
     def readall(self):
         return None
 
     def readinto(self, b):
         return None
 
     def write(self, b):
         return None
 
 
 default_bytes_out_stream = getattr(sys.stdout, "buffer", None) or NoneStream()
 default_bytes_err_stream = getattr(sys.stderr, "buffer", None) or NoneStream()
 
 
 DEFAULT_ENCODING = "utf-8"
 
 
 class Error(Exception):
     """Porcelain-based error. """
 
     def __init__(self, msg, inner=None):
         super(Error, self).__init__(msg)
         self.inner = inner
 
 
 class RemoteExists(Error):
     """Raised when the remote already exists."""
 
 
 def open_repo(path_or_repo):
     """Open an argument that can be a repository or a path for a repository."""
     if isinstance(path_or_repo, BaseRepo):
         return path_or_repo
     return Repo(path_or_repo)
 
 
 @contextmanager
 def _noop_context_manager(obj):
     """Context manager that has the same api as closing but does nothing."""
     yield obj
 
 
 def open_repo_closing(path_or_repo):
     """Open an argument that can be a repository or a path for a repository.
     returns a context manager that will close the repo on exit if the argument
     is a path, else does nothing if the argument is a repo.
     """
     if isinstance(path_or_repo, BaseRepo):
         return _noop_context_manager(path_or_repo)
     return closing(Repo(path_or_repo))
 
 
 def path_to_tree_path(repopath, path, tree_encoding=DEFAULT_ENCODING):
     """Convert a path to a path usable in an index, e.g. bytes and relative to
     the repository root.
 
     Args:
       repopath: Repository path, absolute or relative to the cwd
       path: A path, absolute or relative to the cwd
     Returns: A path formatted for use in e.g. an index
     """
     # Pathlib resolve before Python 3.6 could raises FileNotFoundError in case
     # there is no file matching the path so we reuse the old implementation for
     # Python 3.5
     if sys.version_info < (3, 6):
         if not isinstance(path, bytes):
             path = os.fsencode(path)
         if not isinstance(repopath, bytes):
             repopath = os.fsencode(repopath)
         treepath = os.path.relpath(path, repopath)
         if treepath.startswith(b".."):
             err_msg = "Path %r not in repo path (%r)" % (path, repopath)
             raise ValueError(err_msg)
         if os.path.sep != "/":
             treepath = treepath.replace(os.path.sep.encode("ascii"), b"/")
         return treepath
     else:
         # Resolve might returns a relative path on Windows
         # https://bugs.python.org/issue38671
         if sys.platform == "win32":
             path = os.path.abspath(path)
 
         path = Path(path)
         resolved_path = path.resolve()
 
         # Resolve and abspath seems to behave differently regarding symlinks,
         # as we are doing abspath on the file path, we need to do the same on
         # the repo path or they might not match
         if sys.platform == "win32":
             repopath = os.path.abspath(repopath)
 
         repopath = Path(repopath).resolve()
 
         try:
             relpath = resolved_path.relative_to(repopath)
         except ValueError:
             # If path is a symlink that points to a file outside the repo, we
             # want the relpath for the link itself, not the resolved target
             if path.is_symlink():
                 parent = path.parent.resolve()
                 relpath = (parent / path.name).relative_to(repopath)
             else:
                 raise
         if sys.platform == "win32":
             return str(relpath).replace(os.path.sep, "/").encode(tree_encoding)
         else:
             return bytes(relpath)
 
 
 class DivergedBranches(Error):
     """Branches have diverged and fast-forward is not possible."""
 
 
 def check_diverged(repo, current_sha, new_sha):
     """Check if updating to a sha can be done with fast forwarding.
 
     Args:
       repo: Repository object
       current_sha: Current head sha
       new_sha: New head sha
     """
     try:
         can = can_fast_forward(repo, current_sha, new_sha)
     except KeyError:
         can = False
     if not can:
         raise DivergedBranches(current_sha, new_sha)
 
 
 def archive(
     repo,
     committish=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
 ):
     """Create an archive.
 
     Args:
       repo: Path of repository for which to generate an archive.
       committish: Commit SHA1 or ref to use
       outstream: Output stream (defaults to stdout)
       errstream: Error stream (defaults to stderr)
     """
 
     if committish is None:
         committish = "HEAD"
     with open_repo_closing(repo) as repo_obj:
         c = parse_commit(repo_obj, committish)
         for chunk in tar_stream(
             repo_obj.object_store, repo_obj.object_store[c.tree], c.commit_time
         ):
             outstream.write(chunk)
 
 
 def update_server_info(repo="."):
     """Update server info files for a repository.
 
     Args:
       repo: path to the repository
     """
     with open_repo_closing(repo) as r:
         server_update_server_info(r)
 
 
 def symbolic_ref(repo, ref_name, force=False):
     """Set git symbolic ref into HEAD.
 
     Args:
       repo: path to the repository
       ref_name: short name of the new ref
       force: force settings without checking if it exists in refs/heads
     """
     with open_repo_closing(repo) as repo_obj:
         ref_path = _make_branch_ref(ref_name)
         if not force and ref_path not in repo_obj.refs.keys():
             raise Error("fatal: ref `%s` is not a ref" % ref_name)
         repo_obj.refs.set_symbolic_ref(b"HEAD", ref_path)
 
 
 def commit(
     repo=".",
     message=None,
     author=None,
     committer=None,
     encoding=None,
     no_verify=False,
 ):
     """Create a new commit.
 
     Args:
       repo: Path to repository
       message: Optional commit message
       author: Optional author name and email
       committer: Optional committer name and email
       no_verify: Skip pre-commit and commit-msg hooks
     Returns: SHA1 of the new commit
     """
     # FIXME: Support --all argument
     # FIXME: Support --signoff argument
     if getattr(message, "encode", None):
         message = message.encode(encoding or DEFAULT_ENCODING)
     if getattr(author, "encode", None):
         author = author.encode(encoding or DEFAULT_ENCODING)
     if getattr(committer, "encode", None):
         committer = committer.encode(encoding or DEFAULT_ENCODING)
     with open_repo_closing(repo) as r:
         return r.do_commit(
             message=message,
             author=author,
             committer=committer,
             encoding=encoding,
             no_verify=no_verify,
         )
 
 
 def commit_tree(repo, tree, message=None, author=None, committer=None):
     """Create a new commit object.
 
     Args:
       repo: Path to repository
       tree: An existing tree object
       author: Optional author name and email
       committer: Optional committer name and email
     """
     with open_repo_closing(repo) as r:
         return r.do_commit(
             message=message, tree=tree, committer=committer, author=author
         )
 
 
 def init(path=".", bare=False):
     """Create a new git repository.
 
     Args:
       path: Path to repository.
       bare: Whether to create a bare repository.
     Returns: A Repo instance
     """
     if not os.path.exists(path):
         os.mkdir(path)
 
     if bare:
         return Repo.init_bare(path)
     else:
         return Repo.init(path)
 
 
 def clone(
     source,
     target=None,
     bare=False,
     checkout=None,
     errstream=default_bytes_err_stream,
     outstream=None,
     origin=b"origin",
     depth=None,
     **kwargs
 ):
     """Clone a local or remote git repository.
 
     Args:
       source: Path or URL for source repository
       target: Path to target repository (optional)
       bare: Whether or not to create a bare repository
       checkout: Whether or not to check-out HEAD after cloning
       errstream: Optional stream to write progress to
       outstream: Optional stream to write progress to (deprecated)
       origin: Name of remote from the repository used to clone
       depth: Depth to fetch at
     Returns: The new repository
     """
     # TODO(jelmer): This code overlaps quite a bit with Repo.clone
     if outstream is not None:
         import warnings
 
         warnings.warn(
             "outstream= has been deprecated in favour of errstream=.",
             DeprecationWarning,
             stacklevel=3,
         )
         errstream = outstream
 
     if checkout is None:
         checkout = not bare
     if checkout and bare:
         raise Error("checkout and bare are incompatible")
 
     if target is None:
         target = source.split("/")[-1]
 
     if not os.path.exists(target):
         os.mkdir(target)
 
     if bare:
         r = Repo.init_bare(target)
     else:
         r = Repo.init(target)
 
     reflog_message = b"clone: from " + source.encode("utf-8")
     try:
         target_config = r.get_config()
         if not isinstance(source, bytes):
             source = source.encode(DEFAULT_ENCODING)
         target_config.set((b"remote", origin), b"url", source)
         target_config.set(
             (b"remote", origin),
             b"fetch",
             b"+refs/heads/*:refs/remotes/" + origin + b"/*",
         )
         target_config.write_to_path()
         fetch_result = fetch(
             r,
             origin,
             errstream=errstream,
             message=reflog_message,
             depth=depth,
             **kwargs
         )
         for key, target in fetch_result.symrefs.items():
             r.refs.set_symbolic_ref(key, target)
         try:
             head = r[fetch_result.refs[b"HEAD"]]
         except KeyError:
             head = None
         else:
             r[b"HEAD"] = head.id
         if checkout and not bare and head is not None:
             errstream.write(b"Checking out " + head.id + b"\n")
             r.reset_index(head.tree)
     except BaseException:
         shutil.rmtree(target)
         r.close()
         raise
 
     return r
 
 
 def add(repo=".", paths=None):
     """Add files to the staging area.
 
     Args:
       repo: Repository for the files
       paths: Paths to add.  No value passed stages all modified files.
     Returns: Tuple with set of added files and ignored files
 
     If the repository contains ignored directories, the returned set will
     contain the path to an ignored directory (with trailing slash). Individual
     files within ignored directories will not be returned.
     """
     ignored = set()
     with open_repo_closing(repo) as r:
         repo_path = Path(r.path).resolve()
         ignore_manager = IgnoreFilterManager.from_repo(r)
         if not paths:
             paths = list(
                 get_untracked_paths(
                     str(Path(os.getcwd()).resolve()),
                     str(repo_path),
                     r.open_index(),
                 )
             )
         relpaths = []
         if not isinstance(paths, list):
             paths = [paths]
         for p in paths:
             path = Path(p)
             relpath = str(path.resolve().relative_to(repo_path))
             # FIXME: Support patterns
             if path.is_dir():
                 relpath = os.path.join(relpath, "")
             if ignore_manager.is_ignored(relpath):
                 ignored.add(relpath)
                 continue
             relpaths.append(relpath)
         r.stage(relpaths)
     return (relpaths, ignored)
 
 
 def _is_subdir(subdir, parentdir):
     """Check whether subdir is parentdir or a subdir of parentdir
 
     If parentdir or subdir is a relative path, it will be disamgibuated
     relative to the pwd.
     """
     parentdir_abs = os.path.realpath(parentdir) + os.path.sep
     subdir_abs = os.path.realpath(subdir) + os.path.sep
     return subdir_abs.startswith(parentdir_abs)
 
 
 # TODO: option to remove ignored files also, in line with `git clean -fdx`
 def clean(repo=".", target_dir=None):
     """Remove any untracked files from the target directory recursively
 
     Equivalent to running `git clean -fd` in target_dir.
 
     Args:
       repo: Repository where the files may be tracked
       target_dir: Directory to clean - current directory if None
     """
     if target_dir is None:
         target_dir = os.getcwd()
 
     with open_repo_closing(repo) as r:
         if not _is_subdir(target_dir, r.path):
             raise Error("target_dir must be in the repo's working dir")
 
         config = r.get_config_stack()
         require_force = config.get_boolean(  # noqa: F841
             (b"clean",), b"requireForce", True
         )
 
         # TODO(jelmer): if require_force is set, then make sure that -f, -i or
         # -n is specified.
 
         index = r.open_index()
         ignore_manager = IgnoreFilterManager.from_repo(r)
 
         paths_in_wd = _walk_working_dir_paths(target_dir, r.path)
         # Reverse file visit order, so that files and subdirectories are
         # removed before containing directory
         for ap, is_dir in reversed(list(paths_in_wd)):
             if is_dir:
                 # All subdirectories and files have been removed if untracked,
                 # so dir contains no tracked files iff it is empty.
                 is_empty = len(os.listdir(ap)) == 0
                 if is_empty:
                     os.rmdir(ap)
             else:
                 ip = path_to_tree_path(r.path, ap)
                 is_tracked = ip in index
 
                 rp = os.path.relpath(ap, r.path)
                 is_ignored = ignore_manager.is_ignored(rp)
 
                 if not is_tracked and not is_ignored:
                     os.remove(ap)
 
 
 def remove(repo=".", paths=None, cached=False):
     """Remove files from the staging area.
 
     Args:
       repo: Repository for the files
       paths: Paths to remove
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
         for p in paths:
             full_path = os.fsencode(os.path.abspath(p))
             tree_path = path_to_tree_path(r.path, p)
             try:
                 index_sha = index[tree_path].sha
             except KeyError:
                 raise Error("%s did not match any files" % p)
 
             if not cached:
                 try:
                     st = os.lstat(full_path)
                 except OSError:
                     pass
                 else:
                     try:
                         blob = blob_from_path_and_stat(full_path, st)
                     except IOError:
                         pass
                     else:
                         try:
                             committed_sha = tree_lookup_path(
                                 r.__getitem__, r[r.head()].tree, tree_path
                             )[1]
                         except KeyError:
                             committed_sha = None
 
                         if blob.id != index_sha and index_sha != committed_sha:
                             raise Error(
                                 "file has staged content differing "
                                 "from both the file and head: %s" % p
                             )
 
                         if index_sha != committed_sha:
                             raise Error("file has staged changes: %s" % p)
                         os.remove(full_path)
             del index[tree_path]
         index.write()
 
 
 rm = remove
 
 
 def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING):
     if commit.encoding:
         encoding = commit.encoding.decode("ascii")
     else:
         encoding = default_encoding
     return contents.decode(encoding, "replace")
 
 
 def commit_encode(commit, contents, default_encoding=DEFAULT_ENCODING):
     if commit.encoding:
         encoding = commit.encoding.decode("ascii")
     else:
         encoding = default_encoding
     return contents.encode(encoding)
 
 
 def print_commit(commit, decode, outstream=sys.stdout):
     """Write a human-readable commit log entry.
 
     Args:
       commit: A `Commit` object
       outstream: A stream file to write to
     """
     outstream.write("-" * 50 + "\n")
     outstream.write("commit: " + commit.id.decode("ascii") + "\n")
     if len(commit.parents) > 1:
         outstream.write(
             "merge: "
             + "...".join([c.decode("ascii") for c in commit.parents[1:]])
             + "\n"
         )
     outstream.write("Author: " + decode(commit.author) + "\n")
     if commit.author != commit.committer:
         outstream.write("Committer: " + decode(commit.committer) + "\n")
 
     time_tuple = time.gmtime(commit.author_time + commit.author_timezone)
     time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple)
     timezone_str = format_timezone(commit.author_timezone).decode("ascii")
     outstream.write("Date:   " + time_str + " " + timezone_str + "\n")
     outstream.write("\n")
     outstream.write(decode(commit.message) + "\n")
     outstream.write("\n")
 
 
 def print_tag(tag, decode, outstream=sys.stdout):
     """Write a human-readable tag.
 
     Args:
       tag: A `Tag` object
       decode: Function for decoding bytes to unicode string
       outstream: A stream to write to
     """
     outstream.write("Tagger: " + decode(tag.tagger) + "\n")
     time_tuple = time.gmtime(tag.tag_time + tag.tag_timezone)
     time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple)
     timezone_str = format_timezone(tag.tag_timezone).decode("ascii")
     outstream.write("Date:   " + time_str + " " + timezone_str + "\n")
     outstream.write("\n")
     outstream.write(decode(tag.message))
     outstream.write("\n")
 
 
 def show_blob(repo, blob, decode, outstream=sys.stdout):
     """Write a blob to a stream.
 
     Args:
       repo: A `Repo` object
       blob: A `Blob` object
       decode: Function for decoding bytes to unicode string
       outstream: A stream file to write to
     """
     outstream.write(decode(blob.data))
 
 
 def show_commit(repo, commit, decode, outstream=sys.stdout):
     """Show a commit to a stream.
 
     Args:
       repo: A `Repo` object
       commit: A `Commit` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     print_commit(commit, decode=decode, outstream=outstream)
     if commit.parents:
         parent_commit = repo[commit.parents[0]]
         base_tree = parent_commit.tree
     else:
         base_tree = None
     diffstream = BytesIO()
     write_tree_diff(diffstream, repo.object_store, base_tree, commit.tree)
     diffstream.seek(0)
     outstream.write(commit_decode(commit, diffstream.getvalue()))
 
 
 def show_tree(repo, tree, decode, outstream=sys.stdout):
     """Print a tree to a stream.
 
     Args:
       repo: A `Repo` object
       tree: A `Tree` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     for n in tree:
         outstream.write(decode(n) + "\n")
 
 
 def show_tag(repo, tag, decode, outstream=sys.stdout):
     """Print a tag to a stream.
 
     Args:
       repo: A `Repo` object
       tag: A `Tag` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     print_tag(tag, decode, outstream)
     show_object(repo, repo[tag.object[1]], decode, outstream)
 
 
 def show_object(repo, obj, decode, outstream):
     return {
         b"tree": show_tree,
         b"blob": show_blob,
         b"commit": show_commit,
         b"tag": show_tag,
     }[obj.type_name](repo, obj, decode, outstream)
 
 
 def print_name_status(changes):
     """Print a simple status summary, listing changed files."""
     for change in changes:
         if not change:
             continue
         if isinstance(change, list):
             change = change[0]
         if change.type == CHANGE_ADD:
             path1 = change.new.path
             path2 = ""
             kind = "A"
         elif change.type == CHANGE_DELETE:
             path1 = change.old.path
             path2 = ""
             kind = "D"
         elif change.type == CHANGE_MODIFY:
             path1 = change.new.path
             path2 = ""
             kind = "M"
         elif change.type in RENAME_CHANGE_TYPES:
             path1 = change.old.path
             path2 = change.new.path
             if change.type == CHANGE_RENAME:
                 kind = "R"
             elif change.type == CHANGE_COPY:
                 kind = "C"
         yield "%-8s%-20s%-20s" % (kind, path1, path2)
 
 
 def log(
     repo=".",
     paths=None,
     outstream=sys.stdout,
     max_entries=None,
     reverse=False,
     name_status=False,
 ):
     """Write commit logs.
 
     Args:
       repo: Path to repository
       paths: Optional set of specific paths to print entries for
       outstream: Stream to write log output to
       reverse: Reverse order in which entries are printed
       name_status: Print name status
       max_entries: Optional maximum number of entries to display
     """
     with open_repo_closing(repo) as r:
         walker = r.get_walker(max_entries=max_entries, paths=paths, reverse=reverse)
         for entry in walker:
 
             def decode(x):
                 return commit_decode(entry.commit, x)
 
             print_commit(entry.commit, decode, outstream)
             if name_status:
                 outstream.writelines(
                     [line + "\n" for line in print_name_status(entry.changes())]
                 )
 
 
 # TODO(jelmer): better default for encoding?
 def show(
     repo=".",
     objects=None,
     outstream=sys.stdout,
     default_encoding=DEFAULT_ENCODING,
 ):
     """Print the changes in a commit.
 
     Args:
       repo: Path to repository
       objects: Objects to show (defaults to [HEAD])
       outstream: Stream to write to
       default_encoding: Default encoding to use if none is set in the
         commit
     """
     if objects is None:
         objects = ["HEAD"]
     if not isinstance(objects, list):
         objects = [objects]
     with open_repo_closing(repo) as r:
         for objectish in objects:
             o = parse_object(r, objectish)
             if isinstance(o, Commit):
 
                 def decode(x):
                     return commit_decode(o, x, default_encoding)
 
             else:
 
                 def decode(x):
                     return x.decode(default_encoding)
 
             show_object(r, o, decode, outstream)
 
 
 def diff_tree(repo, old_tree, new_tree, outstream=sys.stdout):
     """Compares the content and mode of blobs found via two tree objects.
 
     Args:
       repo: Path to repository
       old_tree: Id of old tree
       new_tree: Id of new tree
       outstream: Stream to write to
     """
     with open_repo_closing(repo) as r:
         write_tree_diff(outstream, r.object_store, old_tree, new_tree)
 
 
 def rev_list(repo, commits, outstream=sys.stdout):
     """Lists commit objects in reverse chronological order.
 
     Args:
       repo: Path to repository
       commits: Commits over which to iterate
       outstream: Stream to write to
     """
     with open_repo_closing(repo) as r:
         for entry in r.get_walker(include=[r[c].id for c in commits]):
             outstream.write(entry.commit.id + b"\n")
 
 
 def tag(*args, **kwargs):
     import warnings
 
     warnings.warn(
         "tag has been deprecated in favour of tag_create.", DeprecationWarning
     )
     return tag_create(*args, **kwargs)
 
 
 def tag_create(
     repo,
     tag,
     author=None,
     message=None,
     annotated=False,
     objectish="HEAD",
     tag_time=None,
     tag_timezone=None,
     sign=False,
 ):
     """Creates a tag in git via dulwich calls:
 
     Args:
       repo: Path to repository
       tag: tag string
       author: tag author (optional, if annotated is set)
       message: tag message (optional)
       annotated: whether to create an annotated tag
       objectish: object the tag should point at, defaults to HEAD
       tag_time: Optional time for annotated tag
       tag_timezone: Optional timezone for annotated tag
       sign: GPG Sign the tag (bool, defaults to False,
         pass True to use default GPG key,
         pass a str containing Key ID to use a specific GPG key)
     """
 
     with open_repo_closing(repo) as r:
         object = parse_object(r, objectish)
 
         if annotated:
             # Create the tag object
             tag_obj = Tag()
             if author is None:
                 # TODO(jelmer): Don't use repo private method.
                 author = r._get_user_identity(r.get_config_stack())
             tag_obj.tagger = author
             tag_obj.message = message + "\n".encode()
             tag_obj.name = tag
             tag_obj.object = (type(object), object.id)
             if tag_time is None:
                 tag_time = int(time.time())
             tag_obj.tag_time = tag_time
             if tag_timezone is None:
                 # TODO(jelmer) Use current user timezone rather than UTC
                 tag_timezone = 0
             elif isinstance(tag_timezone, str):
                 tag_timezone = parse_timezone(tag_timezone)
             tag_obj.tag_timezone = tag_timezone
             if sign:
                 tag_obj.sign(sign if isinstance(sign, str) else None)
 
             r.object_store.add_object(tag_obj)
             tag_id = tag_obj.id
         else:
             tag_id = object.id
 
         r.refs[_make_tag_ref(tag)] = tag_id
 
 
 def list_tags(*args, **kwargs):
     import warnings
 
     warnings.warn(
         "list_tags has been deprecated in favour of tag_list.",
         DeprecationWarning,
     )
     return tag_list(*args, **kwargs)
 
 
 def tag_list(repo, outstream=sys.stdout):
     """List all tags.
 
     Args:
       repo: Path to repository
       outstream: Stream to write tags to
     """
     with open_repo_closing(repo) as r:
         tags = sorted(r.refs.as_dict(b"refs/tags"))
         return tags
 
 
 def tag_delete(repo, name):
     """Remove a tag.
 
     Args:
       repo: Path to repository
       name: Name of tag to remove
     """
     with open_repo_closing(repo) as r:
         if isinstance(name, bytes):
             names = [name]
         elif isinstance(name, list):
             names = name
         else:
             raise Error("Unexpected tag name type %r" % name)
         for name in names:
             del r.refs[_make_tag_ref(name)]
 
 
 def reset(repo, mode, treeish="HEAD"):
     """Reset current HEAD to the specified state.
 
     Args:
       repo: Path to repository
       mode: Mode ("hard", "soft", "mixed")
       treeish: Treeish to reset to
     """
 
     if mode != "hard":
         raise Error("hard is the only mode currently supported")
 
     with open_repo_closing(repo) as r:
         tree = parse_tree(r, treeish)
         r.reset_index(tree.id)
 
 
 def get_remote_repo(
     repo: Repo, remote_location: Optional[Union[str, bytes]] = None
 ) -> Tuple[Optional[str], str]:
     config = repo.get_config()
     if remote_location is None:
         remote_location = get_branch_remote(repo)
     if isinstance(remote_location, str):
         encoded_location = remote_location.encode()
     else:
         encoded_location = remote_location
 
     section = (b"remote", encoded_location)
 
     remote_name = None  # type: Optional[str]
 
     if config.has_section(section):
         remote_name = encoded_location.decode()
         url = config.get(section, "url")
         encoded_location = url
     else:
         remote_name = None
 
     return (remote_name, encoded_location.decode())
 
 
 def push(
     repo,
     remote_location=None,
     refspecs=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
     force=False,
     **kwargs
 ):
     """Remote push with dulwich via dulwich.client
 
     Args:
       repo: Path to repository
       remote_location: Location of the remote
       refspecs: Refs to push to remote
       outstream: A stream file to write output
       errstream: A stream file to write errors
       force: Force overwriting refs
     """
 
     # Open the repo
     with open_repo_closing(repo) as r:
         if refspecs is None:
             refspecs = [active_branch(r)]
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
         # Get the client and path
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
 
         selected_refs = []
         remote_changed_refs = {}
 
         def update_refs(refs):
             selected_refs.extend(parse_reftuples(r.refs, refs, refspecs, force=force))
             new_refs = {}
             # TODO: Handle selected_refs == {None: None}
             for (lh, rh, force_ref) in selected_refs:
                 if lh is None:
                     new_refs[rh] = ZERO_SHA
                     remote_changed_refs[rh] = None
                 else:
                     try:
                         localsha = r.refs[lh]
                     except KeyError:
                         raise Error("No valid ref %s in local repository" % lh)
                     if not force_ref and rh in refs:
                         check_diverged(r, refs[rh], localsha)
                     new_refs[rh] = localsha
                     remote_changed_refs[rh] = localsha
             return new_refs
 
         err_encoding = getattr(errstream, "encoding", None) or DEFAULT_ENCODING
         remote_location = client.get_url(path)
         try:
             result = client.send_pack(
                 path,
                 update_refs,
                 generate_pack_data=r.generate_pack_data,
                 progress=errstream.write,
             )
         except SendPackError as e:
             raise Error(
                 "Push to " + remote_location + " failed -> " + e.args[0].decode(),
                 inner=e,
             )
         else:
             errstream.write(
                 b"Push to " + remote_location.encode(err_encoding) + b" successful.\n"
             )
 
         for ref, error in (result.ref_status or {}).items():
             if error is not None:
                 errstream.write(
                     b"Push of ref %s failed: %s\n" % (ref, error.encode(err_encoding))
                 )
             else:
                 errstream.write(b"Ref %s updated\n" % ref)
 
         if remote_name is not None:
             _import_remote_refs(r.refs, remote_name, remote_changed_refs)
 
 
 def pull(
     repo,
     remote_location=None,
     refspecs=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
     fast_forward=True,
     force=False,
     **kwargs
 ):
     """Pull from remote via dulwich.client
 
     Args:
       repo: Path to repository
       remote_location: Location of the remote
       refspec: refspecs to fetch
       outstream: A stream file to write to output
       errstream: A stream file to write to errors
     """
     # Open the repo
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
         if refspecs is None:
             refspecs = [b"HEAD"]
         selected_refs = []
 
         def determine_wants(remote_refs, **kwargs):
             selected_refs.extend(
                 parse_reftuples(remote_refs, r.refs, refspecs, force=force)
             )
             return [
                 remote_refs[lh]
                 for (lh, rh, force_ref) in selected_refs
                 if remote_refs[lh] not in r.object_store
             ]
 
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
         fetch_result = client.fetch(
             path, r, progress=errstream.write, determine_wants=determine_wants
         )
         for (lh, rh, force_ref) in selected_refs:
             try:
                 check_diverged(r, r.refs[rh], fetch_result.refs[lh])
             except DivergedBranches:
                 if fast_forward:
                     raise
                 else:
                     raise NotImplementedError("merge is not yet supported")
             r.refs[rh] = fetch_result.refs[lh]
         if selected_refs:
             r[b"HEAD"] = fetch_result.refs[selected_refs[0][1]]
 
         # Perform 'git checkout .' - syncs staged changes
         tree = r[b"HEAD"].tree
         r.reset_index(tree=tree)
         if remote_name is not None:
             _import_remote_refs(r.refs, remote_name, fetch_result.refs)
 
 
 def status(repo=".", ignored=False):
     """Returns staged, unstaged, and untracked changes relative to the HEAD.
 
     Args:
       repo: Path to repository or repository object
       ignored: Whether to include ignored files in `untracked`
     Returns: GitStatus tuple,
         staged -  dict with lists of staged paths (diff index/HEAD)
         unstaged -  list of unstaged paths (diff index/working-tree)
         untracked - list of untracked, un-ignored & non-.git paths
     """
     with open_repo_closing(repo) as r:
         # 1. Get status of staged
         tracked_changes = get_tree_changes(r)
         # 2. Get status of unstaged
         index = r.open_index()
         normalizer = r.get_blob_normalizer()
         filter_callback = normalizer.checkin_normalize
-        unstaged_changes = list(get_unstaged_changes(index, r, filter_callback))
+        unstaged_changes = list(get_unstaged_changes(index, r.path, filter_callback))
 
         untracked_paths = get_untracked_paths(
             r.path, r.path, index, exclude_ignored=not ignored
         )
         untracked_changes = list(untracked_paths)
 
         return GitStatus(tracked_changes, unstaged_changes, untracked_changes)
 
 
 def _walk_working_dir_paths(frompath, basepath, prune_dirnames=None):
     """Get path, is_dir for files in working dir from frompath
 
     Args:
       frompath: Path to begin walk
       basepath: Path to compare to
       prune_dirnames: Optional callback to prune dirnames during os.walk
         dirnames will be set to result of prune_dirnames(dirpath, dirnames)
     """
     for dirpath, dirnames, filenames in os.walk(frompath):
         # Skip .git and below.
         if ".git" in dirnames:
             dirnames.remove(".git")
             if dirpath != basepath:
                 continue
 
         if ".git" in filenames:
             filenames.remove(".git")
             if dirpath != basepath:
                 continue
 
         if dirpath != frompath:
             yield dirpath, True
 
         for filename in filenames:
             filepath = os.path.join(dirpath, filename)
             yield filepath, False
 
         if prune_dirnames:
             dirnames[:] = prune_dirnames(dirpath, dirnames)
 
 
 def get_untracked_paths(frompath, basepath, index, exclude_ignored=False):
     """Get untracked paths.
 
     Args:
       frompath: Path to walk
       basepath: Path to compare to
       index: Index to check against
       exclude_ignored: Whether to exclude ignored paths
 
     Note: ignored directories will never be walked for performance reasons.
       If exclude_ignored is False, only the path to an ignored directory will
       be yielded, no files inside the directory will be returned
     """
     with open_repo_closing(basepath) as r:
         ignore_manager = IgnoreFilterManager.from_repo(r)
 
     ignored_dirs = []
 
     def prune_dirnames(dirpath, dirnames):
         for i in range(len(dirnames) - 1, -1, -1):
             path = os.path.join(dirpath, dirnames[i])
             ip = os.path.join(os.path.relpath(path, basepath), "")
             if ignore_manager.is_ignored(ip):
                 if not exclude_ignored:
                     ignored_dirs.append(
                         os.path.join(os.path.relpath(path, frompath), "")
                     )
                 del dirnames[i]
         return dirnames
 
     for ap, is_dir in _walk_working_dir_paths(
         frompath, basepath, prune_dirnames=prune_dirnames
     ):
         if not is_dir:
             ip = path_to_tree_path(basepath, ap)
             if ip not in index:
                 if (
                     not exclude_ignored
                     or not ignore_manager.is_ignored(
                         os.path.relpath(ap, basepath)
                     )
                 ):
                     yield os.path.relpath(ap, frompath)
 
     yield from ignored_dirs
 
 
 def get_tree_changes(repo):
     """Return add/delete/modify changes to tree by comparing index to HEAD.
 
     Args:
       repo: repo path or object
     Returns: dict with lists for each type of change
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
 
         # Compares the Index to the HEAD & determines changes
         # Iterate through the changes and report add/delete/modify
         # TODO: call out to dulwich.diff_tree somehow.
         tracked_changes = {
             "add": [],
             "delete": [],
             "modify": [],
         }
         try:
             tree_id = r[b"HEAD"].tree
         except KeyError:
             tree_id = None
 
         for change in index.changes_from_tree(r.object_store, tree_id):
             if not change[0][0]:
                 tracked_changes["add"].append(change[0][1])
             elif not change[0][1]:
                 tracked_changes["delete"].append(change[0][0])
             elif change[0][0] == change[0][1]:
                 tracked_changes["modify"].append(change[0][0])
             else:
                 raise NotImplementedError("git mv ops not yet supported")
         return tracked_changes
 
 
 def daemon(path=".", address=None, port=None):
     """Run a daemon serving Git requests over TCP/IP.
 
     Args:
       path: Path to the directory to serve.
       address: Optional address to listen on (defaults to ::)
       port: Optional port to listen on (defaults to TCP_GIT_PORT)
     """
     # TODO(jelmer): Support git-daemon-export-ok and --export-all.
     backend = FileSystemBackend(path)
     server = TCPGitServer(backend, address, port)
     server.serve_forever()
 
 
 def web_daemon(path=".", address=None, port=None):
     """Run a daemon serving Git requests over HTTP.
 
     Args:
       path: Path to the directory to serve
       address: Optional address to listen on (defaults to ::)
       port: Optional port to listen on (defaults to 80)
     """
     from dulwich.web import (
         make_wsgi_chain,
         make_server,
         WSGIRequestHandlerLogger,
         WSGIServerLogger,
     )
 
     backend = FileSystemBackend(path)
     app = make_wsgi_chain(backend)
     server = make_server(
         address,
         port,
         app,
         handler_class=WSGIRequestHandlerLogger,
         server_class=WSGIServerLogger,
     )
     server.serve_forever()
 
 
 def upload_pack(path=".", inf=None, outf=None):
     """Upload a pack file after negotiating its contents using smart protocol.
 
     Args:
       path: Path to the repository
       inf: Input stream to communicate with client
       outf: Output stream to communicate with client
     """
     if outf is None:
         outf = getattr(sys.stdout, "buffer", sys.stdout)
     if inf is None:
         inf = getattr(sys.stdin, "buffer", sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
 
     def send_fn(data):
         outf.write(data)
         outf.flush()
 
     proto = Protocol(inf.read, send_fn)
     handler = UploadPackHandler(backend, [path], proto)
     # FIXME: Catch exceptions and write a single-line summary to outf.
     handler.handle()
     return 0
 
 
 def receive_pack(path=".", inf=None, outf=None):
     """Receive a pack file after negotiating its contents using smart protocol.
 
     Args:
       path: Path to the repository
       inf: Input stream to communicate with client
       outf: Output stream to communicate with client
     """
     if outf is None:
         outf = getattr(sys.stdout, "buffer", sys.stdout)
     if inf is None:
         inf = getattr(sys.stdin, "buffer", sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
 
     def send_fn(data):
         outf.write(data)
         outf.flush()
 
     proto = Protocol(inf.read, send_fn)
     handler = ReceivePackHandler(backend, [path], proto)
     # FIXME: Catch exceptions and write a single-line summary to outf.
     handler.handle()
     return 0
 
 
 def _make_branch_ref(name):
     if getattr(name, "encode", None):
         name = name.encode(DEFAULT_ENCODING)
     return LOCAL_BRANCH_PREFIX + name
 
 
 def _make_tag_ref(name):
     if getattr(name, "encode", None):
         name = name.encode(DEFAULT_ENCODING)
     return b"refs/tags/" + name
 
 
 def branch_delete(repo, name):
     """Delete a branch.
 
     Args:
       repo: Path to the repository
       name: Name of the branch
     """
     with open_repo_closing(repo) as r:
         if isinstance(name, list):
             names = name
         else:
             names = [name]
         for name in names:
             del r.refs[_make_branch_ref(name)]
 
 
 def branch_create(repo, name, objectish=None, force=False):
     """Create a branch.
 
     Args:
       repo: Path to the repository
       name: Name of the new branch
       objectish: Target object to point new branch at (defaults to HEAD)
       force: Force creation of branch, even if it already exists
     """
     with open_repo_closing(repo) as r:
         if objectish is None:
             objectish = "HEAD"
         object = parse_object(r, objectish)
         refname = _make_branch_ref(name)
         ref_message = b"branch: Created from " + objectish.encode("utf-8")
         if force:
             r.refs.set_if_equals(refname, None, object.id, message=ref_message)
         else:
             if not r.refs.add_if_new(refname, object.id, message=ref_message):
                 raise Error("Branch with name %s already exists." % name)
 
 
 def branch_list(repo):
     """List all branches.
 
     Args:
       repo: Path to the repository
     """
     with open_repo_closing(repo) as r:
         return r.refs.keys(base=LOCAL_BRANCH_PREFIX)
 
 
 def active_branch(repo):
     """Return the active branch in the repository, if any.
 
     Args:
       repo: Repository to open
     Returns:
       branch name
     Raises:
       KeyError: if the repository does not have a working tree
       IndexError: if HEAD is floating
     """
     with open_repo_closing(repo) as r:
         active_ref = r.refs.follow(b"HEAD")[0][1]
         if not active_ref.startswith(LOCAL_BRANCH_PREFIX):
             raise ValueError(active_ref)
         return active_ref[len(LOCAL_BRANCH_PREFIX) :]
 
 
 def get_branch_remote(repo):
     """Return the active branch's remote name, if any.
 
     Args:
       repo: Repository to open
     Returns:
       remote name
     Raises:
       KeyError: if the repository does not have a working tree
     """
     with open_repo_closing(repo) as r:
         branch_name = active_branch(r.path)
         config = r.get_config()
         try:
             remote_name = config.get((b"branch", branch_name), b"remote")
         except KeyError:
             remote_name = b"origin"
     return remote_name
 
 
 def _import_remote_refs(
     refs_container: RefsContainer,
     remote_name: str,
     refs: Dict[str, str],
     message: Optional[bytes] = None,
     prune: bool = False,
     prune_tags: bool = False,
 ):
     stripped_refs = strip_peeled_refs(refs)
     branches = {
         n[len(LOCAL_BRANCH_PREFIX) :]: v
         for (n, v) in stripped_refs.items()
         if n.startswith(LOCAL_BRANCH_PREFIX)
     }
     refs_container.import_refs(
         b"refs/remotes/" + remote_name.encode(),
         branches,
         message=message,
         prune=prune,
     )
     tags = {
         n[len(b"refs/tags/") :]: v
         for (n, v) in stripped_refs.items()
         if n.startswith(b"refs/tags/") and not n.endswith(ANNOTATED_TAG_SUFFIX)
     }
     refs_container.import_refs(b"refs/tags", tags, message=message, prune=prune_tags)
 
 
 def fetch(
     repo,
     remote_location=None,
     outstream=sys.stdout,
     errstream=default_bytes_err_stream,
     message=None,
     depth=None,
     prune=False,
     prune_tags=False,
     force=False,
     **kwargs
 ):
     """Fetch objects from a remote server.
 
     Args:
       repo: Path to the repository
       remote_location: String identifying a remote server
       outstream: Output stream (defaults to stdout)
       errstream: Error stream (defaults to stderr)
       message: Reflog message (defaults to b"fetch: from <remote_name>")
       depth: Depth to fetch at
       prune: Prune remote removed refs
       prune_tags: Prune reomte removed tags
     Returns:
       Dictionary with refs on the remote
     """
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
         if message is None:
             message = b"fetch: from " + remote_location.encode("utf-8")
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
         fetch_result = client.fetch(path, r, progress=errstream.write, depth=depth)
         if remote_name is not None:
             _import_remote_refs(
                 r.refs,
                 remote_name,
                 fetch_result.refs,
                 message,
                 prune=prune,
                 prune_tags=prune_tags,
             )
     return fetch_result
 
 
 def ls_remote(remote, config=None, **kwargs):
     """List the refs in a remote.
 
     Args:
       remote: Remote repository location
       config: Configuration to use
     Returns:
       Dictionary with remote refs
     """
     if config is None:
         config = StackedConfig.default()
     client, host_path = get_transport_and_path(remote, config=config, **kwargs)
     return client.get_refs(host_path)
 
 
 def repack(repo):
     """Repack loose files in a repository.
 
     Currently this only packs loose objects.
 
     Args:
       repo: Path to the repository
     """
     with open_repo_closing(repo) as r:
         r.object_store.pack_loose_objects()
 
 
 def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None):
     """Pack objects into a file.
 
     Args:
       repo: Path to the repository
       object_ids: List of object ids to write
       packf: File-like object to write to
       idxf: File-like object to write to (can be None)
     """
     with open_repo_closing(repo) as r:
         entries, data_sum = write_pack_objects(
             packf,
             r.object_store.iter_shas((oid, None) for oid in object_ids),
             delta_window_size=delta_window_size,
         )
     if idxf is not None:
         entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
         write_pack_index(idxf, entries, data_sum)
 
 
 def ls_tree(
     repo,
     treeish=b"HEAD",
     outstream=sys.stdout,
     recursive=False,
     name_only=False,
 ):
     """List contents of a tree.
 
     Args:
       repo: Path to the repository
       tree_ish: Tree id to list
       outstream: Output stream (defaults to stdout)
       recursive: Whether to recursively list files
       name_only: Only print item name
     """
 
     def list_tree(store, treeid, base):
         for (name, mode, sha) in store[treeid].iteritems():
             if base:
                 name = posixpath.join(base, name)
             if name_only:
                 outstream.write(name + b"\n")
             else:
                 outstream.write(pretty_format_tree_entry(name, mode, sha))
             if stat.S_ISDIR(mode) and recursive:
                 list_tree(store, sha, name)
 
     with open_repo_closing(repo) as r:
         tree = parse_tree(r, treeish)
         list_tree(r.object_store, tree.id, "")
 
 
 def remote_add(repo, name, url):
     """Add a remote.
 
     Args:
       repo: Path to the repository
       name: Remote name
       url: Remote URL
     """
     if not isinstance(name, bytes):
         name = name.encode(DEFAULT_ENCODING)
     if not isinstance(url, bytes):
         url = url.encode(DEFAULT_ENCODING)
     with open_repo_closing(repo) as r:
         c = r.get_config()
         section = (b"remote", name)
         if c.has_section(section):
             raise RemoteExists(section)
         c.set(section, b"url", url)
         c.write_to_path()
 
 
 def check_ignore(repo, paths, no_index=False):
     """Debug gitignore files.
 
     Args:
       repo: Path to the repository
       paths: List of paths to check for
       no_index: Don't check index
     Returns: List of ignored files
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
         ignore_manager = IgnoreFilterManager.from_repo(r)
         for path in paths:
             if not no_index and path_to_tree_path(r.path, path) in index:
                 continue
             if os.path.isabs(path):
                 path = os.path.relpath(path, r.path)
             if ignore_manager.is_ignored(path):
                 yield path
 
 
 def update_head(repo, target, detached=False, new_branch=None):
     """Update HEAD to point at a new branch/commit.
 
     Note that this does not actually update the working tree.
 
     Args:
       repo: Path to the repository
       detach: Create a detached head
       target: Branch or committish to switch to
       new_branch: New branch to create
     """
     with open_repo_closing(repo) as r:
         if new_branch is not None:
             to_set = _make_branch_ref(new_branch)
         else:
             to_set = b"HEAD"
         if detached:
             # TODO(jelmer): Provide some way so that the actual ref gets
             # updated rather than what it points to, so the delete isn't
             # necessary.
             del r.refs[to_set]
             r.refs[to_set] = parse_commit(r, target).id
         else:
             r.refs.set_symbolic_ref(to_set, parse_ref(r, target))
         if new_branch is not None:
             r.refs.set_symbolic_ref(b"HEAD", to_set)
 
 
 def check_mailmap(repo, contact):
     """Check canonical name and email of contact.
 
     Args:
       repo: Path to the repository
       contact: Contact name and/or email
     Returns: Canonical contact data
     """
     with open_repo_closing(repo) as r:
         from dulwich.mailmap import Mailmap
 
         try:
             mailmap = Mailmap.from_path(os.path.join(r.path, ".mailmap"))
         except FileNotFoundError:
             mailmap = Mailmap()
         return mailmap.lookup(contact)
 
 
 def fsck(repo):
     """Check a repository.
 
     Args:
       repo: A path to the repository
     Returns: Iterator over errors/warnings
     """
     with open_repo_closing(repo) as r:
         # TODO(jelmer): check pack files
         # TODO(jelmer): check graph
         # TODO(jelmer): check refs
         for sha in r.object_store:
             o = r.object_store[sha]
             try:
                 o.check()
             except Exception as e:
                 yield (sha, e)
 
 
 def stash_list(repo):
     """List all stashes in a repository."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         return enumerate(list(stash.stashes()))
 
 
 def stash_push(repo):
     """Push a new stash onto the stack."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         stash.push()
 
 
 def stash_pop(repo):
     """Pop a new stash from the stack."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         stash.pop()
 
 
 def ls_files(repo):
     """List all files in an index."""
     with open_repo_closing(repo) as r:
         return sorted(r.open_index())
 
 
 def describe(repo):
     """Describe the repository version.
 
     Args:
       projdir: git repository root
     Returns: a string description of the current git revision
 
     Examples: "gabcdefh", "v0.1" or "v0.1-5-gabcdefh".
     """
     # Get the repository
     with open_repo_closing(repo) as r:
         # Get a list of all tags
         refs = r.get_refs()
         tags = {}
         for key, value in refs.items():
             key = key.decode()
             obj = r.get_object(value)
             if u"tags" not in key:
                 continue
 
             _, tag = key.rsplit(u"/", 1)
 
             try:
                 commit = obj.object
             except AttributeError:
                 continue
             else:
                 commit = r.get_object(commit[1])
             tags[tag] = [
                 datetime.datetime(*time.gmtime(commit.commit_time)[:6]),
                 commit.id.decode("ascii"),
             ]
 
         sorted_tags = sorted(tags.items(), key=lambda tag: tag[1][0], reverse=True)
 
         # If there are no tags, return the current commit
         if len(sorted_tags) == 0:
             return "g{}".format(r[r.head()].id.decode("ascii")[:7])
 
         # We're now 0 commits from the top
         commit_count = 0
 
         # Get the latest commit
         latest_commit = r[r.head()]
 
         # Walk through all commits
         walker = r.get_walker()
         for entry in walker:
             # Check if tag
             commit_id = entry.commit.id.decode("ascii")
             for tag in sorted_tags:
                 tag_name = tag[0]
                 tag_commit = tag[1][1]
                 if commit_id == tag_commit:
                     if commit_count == 0:
                         return tag_name
                     else:
                         return "{}-{}-g{}".format(
                             tag_name,
                             commit_count,
                             latest_commit.id.decode("ascii")[:7],
                         )
 
             commit_count += 1
 
         # Return plain commit if no parent tag can be found
         return "g{}".format(latest_commit.id.decode("ascii")[:7])
 
 
 def get_object_by_path(repo, path, committish=None):
     """Get an object by path.
 
     Args:
       repo: A path to the repository
       path: Path to look up
       committish: Commit to look up path in
     Returns: A `ShaFile` object
     """
     if committish is None:
         committish = "HEAD"
     # Get the repository
     with open_repo_closing(repo) as r:
         commit = parse_commit(r, committish)
         base_tree = commit.tree
         if not isinstance(path, bytes):
             path = commit_encode(commit, path)
         (mode, sha) = tree_lookup_path(r.object_store.__getitem__, base_tree, path)
         return r[sha]
 
 
 def write_tree(repo):
     """Write a tree object from the index.
 
     Args:
       repo: Repository for which to write tree
     Returns: tree id for the tree that was written
     """
     with open_repo_closing(repo) as r:
         return r.open_index().commit(r.object_store)
diff --git a/dulwich/tests/test_index.py b/dulwich/tests/test_index.py
index 9a91cb93..a187bd26 100644
--- a/dulwich/tests/test_index.py
+++ b/dulwich/tests/test_index.py
@@ -1,841 +1,841 @@
 # -*- coding: utf-8 -*-
 # test_index.py -- Tests for the git index
 # encoding: utf-8
 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 
 """Tests for the index."""
 
 
 from io import BytesIO
 import os
 import shutil
 import stat
 import struct
 import sys
 import tempfile
 import warnings
 
 from dulwich.index import (
     Index,
     build_index_from_tree,
     cleanup_mode,
     commit_tree,
     get_unstaged_changes,
     index_entry_from_stat,
     read_index,
     read_index_dict,
     validate_path_element_default,
     validate_path_element_ntfs,
     write_cache_time,
     write_index,
     write_index_dict,
     _tree_to_fs_path,
     _fs_to_tree_path,
     IndexEntry,
 )
 from dulwich.object_store import (
     MemoryObjectStore,
 )
 from dulwich.objects import (
     Blob,
     Commit,
     Tree,
     S_IFGITLINK,
 )
 from dulwich.repo import Repo
 from dulwich.tests import (
     TestCase,
     skipIf,
 )
 from dulwich.tests.utils import (
     setup_warning_catcher,
 )
 
 
 def can_symlink():
     """Return whether running process can create symlinks."""
     if sys.platform != "win32":
         # Platforms other than Windows should allow symlinks without issues.
         return True
 
     if not hasattr(os, "symlink"):
         # Older Python versions do not have `os.symlink` on Windows.
         return False
 
     test_source = tempfile.mkdtemp()
     test_target = test_source + "can_symlink"
     try:
         os.symlink(test_source, test_target)
     except (NotImplementedError, OSError):
         return False
     return True
 
 
 class IndexTestCase(TestCase):
 
     datadir = os.path.join(os.path.dirname(__file__), "data/indexes")
 
     def get_simple_index(self, name):
         return Index(os.path.join(self.datadir, name))
 
 
 class SimpleIndexTestCase(IndexTestCase):
     def test_len(self):
         self.assertEqual(1, len(self.get_simple_index("index")))
 
     def test_iter(self):
         self.assertEqual([b"bla"], list(self.get_simple_index("index")))
 
     def test_iterobjects(self):
         self.assertEqual(
             [(b"bla", b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 33188)],
             list(self.get_simple_index("index").iterobjects()),
         )
 
     def test_iterblobs(self):
         warnings.simplefilter("always", UserWarning)
         self.addCleanup(warnings.resetwarnings)
         warnings_list, restore_warnings = setup_warning_catcher()
         self.addCleanup(restore_warnings)
 
         self.assertEqual(
             [(b"bla", b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 33188)],
             list(self.get_simple_index("index").iterblobs()),
         )
 
         expected_warning = PendingDeprecationWarning("Use iterobjects() instead.")
         for w in warnings_list:
             if type(w) == type(expected_warning) and w.args == expected_warning.args:
                 break
         else:
             raise AssertionError(
                 "Expected warning %r not in %r" % (expected_warning, warnings_list)
             )
 
     def test_getitem(self):
         self.assertEqual(
             (
                 (1230680220, 0),
                 (1230680220, 0),
                 2050,
                 3761020,
                 33188,
                 1000,
                 1000,
                 0,
                 b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
                 0,
                 0,
             ),
             self.get_simple_index("index")[b"bla"],
         )
 
     def test_empty(self):
         i = self.get_simple_index("notanindex")
         self.assertEqual(0, len(i))
         self.assertFalse(os.path.exists(i._filename))
 
     def test_against_empty_tree(self):
         i = self.get_simple_index("index")
         changes = list(i.changes_from_tree(MemoryObjectStore(), None))
         self.assertEqual(1, len(changes))
         (oldname, newname), (oldmode, newmode), (oldsha, newsha) = changes[0]
         self.assertEqual(b"bla", newname)
         self.assertEqual(b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", newsha)
 
 
 class SimpleIndexWriterTestCase(IndexTestCase):
     def setUp(self):
         IndexTestCase.setUp(self)
         self.tempdir = tempfile.mkdtemp()
 
     def tearDown(self):
         IndexTestCase.tearDown(self)
         shutil.rmtree(self.tempdir)
 
     def test_simple_write(self):
         entries = [
             (
                 b"barbla",
                 IndexEntry(
                 (1230680220, 0),
                 (1230680220, 0),
                 2050,
                 3761020,
                 33188,
                 1000,
                 1000,
                 0,
                 b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
                 0,
                 0)
             )
         ]
         filename = os.path.join(self.tempdir, "test-simple-write-index")
         with open(filename, "wb+") as x:
             write_index(x, entries)
 
         with open(filename, "rb") as x:
             self.assertEqual(entries, list(read_index(x)))
 
 
 class ReadIndexDictTests(IndexTestCase):
     def setUp(self):
         IndexTestCase.setUp(self)
         self.tempdir = tempfile.mkdtemp()
 
     def tearDown(self):
         IndexTestCase.tearDown(self)
         shutil.rmtree(self.tempdir)
 
     def test_simple_write(self):
         entries = {
             b"barbla": IndexEntry(
                 (1230680220, 0),
                 (1230680220, 0),
                 2050,
                 3761020,
                 33188,
                 1000,
                 1000,
                 0,
                 b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
                 0,
                 0,
             )
         }
         filename = os.path.join(self.tempdir, "test-simple-write-index")
         with open(filename, "wb+") as x:
             write_index_dict(x, entries)
 
         with open(filename, "rb") as x:
             self.assertEqual(entries, read_index_dict(x))
 
 
 class CommitTreeTests(TestCase):
     def setUp(self):
         super(CommitTreeTests, self).setUp()
         self.store = MemoryObjectStore()
 
     def test_single_blob(self):
         blob = Blob()
         blob.data = b"foo"
         self.store.add_object(blob)
         blobs = [(b"bla", blob.id, stat.S_IFREG)]
         rootid = commit_tree(self.store, blobs)
         self.assertEqual(rootid, b"1a1e80437220f9312e855c37ac4398b68e5c1d50")
         self.assertEqual((stat.S_IFREG, blob.id), self.store[rootid][b"bla"])
         self.assertEqual(set([rootid, blob.id]), set(self.store._data.keys()))
 
     def test_nested(self):
         blob = Blob()
         blob.data = b"foo"
         self.store.add_object(blob)
         blobs = [(b"bla/bar", blob.id, stat.S_IFREG)]
         rootid = commit_tree(self.store, blobs)
         self.assertEqual(rootid, b"d92b959b216ad0d044671981196781b3258fa537")
         dirid = self.store[rootid][b"bla"][1]
         self.assertEqual(dirid, b"c1a1deb9788150829579a8b4efa6311e7b638650")
         self.assertEqual((stat.S_IFDIR, dirid), self.store[rootid][b"bla"])
         self.assertEqual((stat.S_IFREG, blob.id), self.store[dirid][b"bar"])
         self.assertEqual(set([rootid, dirid, blob.id]), set(self.store._data.keys()))
 
 
 class CleanupModeTests(TestCase):
     def assertModeEqual(self, expected, got):
         self.assertEqual(expected, got, "%o != %o" % (expected, got))
 
     def test_file(self):
         self.assertModeEqual(0o100644, cleanup_mode(0o100000))
 
     def test_executable(self):
         self.assertModeEqual(0o100755, cleanup_mode(0o100711))
         self.assertModeEqual(0o100755, cleanup_mode(0o100700))
 
     def test_symlink(self):
         self.assertModeEqual(0o120000, cleanup_mode(0o120711))
 
     def test_dir(self):
         self.assertModeEqual(0o040000, cleanup_mode(0o40531))
 
     def test_submodule(self):
         self.assertModeEqual(0o160000, cleanup_mode(0o160744))
 
 
 class WriteCacheTimeTests(TestCase):
     def test_write_string(self):
         f = BytesIO()
         self.assertRaises(TypeError, write_cache_time, f, "foo")
 
     def test_write_int(self):
         f = BytesIO()
         write_cache_time(f, 434343)
         self.assertEqual(struct.pack(">LL", 434343, 0), f.getvalue())
 
     def test_write_tuple(self):
         f = BytesIO()
         write_cache_time(f, (434343, 21))
         self.assertEqual(struct.pack(">LL", 434343, 21), f.getvalue())
 
     def test_write_float(self):
         f = BytesIO()
         write_cache_time(f, 434343.000000021)
         self.assertEqual(struct.pack(">LL", 434343, 21), f.getvalue())
 
 
 class IndexEntryFromStatTests(TestCase):
     def test_simple(self):
         st = os.stat_result(
             (
                 16877,
                 131078,
                 64769,
                 154,
                 1000,
                 1000,
                 12288,
                 1323629595,
                 1324180496,
                 1324180496,
             )
         )
         entry = index_entry_from_stat(st, "22" * 20, 0)
         self.assertEqual(
             entry,
             IndexEntry(
                 1324180496,
                 1324180496,
                 64769,
                 131078,
                 16384,
                 1000,
                 1000,
                 12288,
                 "2222222222222222222222222222222222222222",
                 0,
                 None,
             ),
         )
 
     def test_override_mode(self):
         st = os.stat_result(
             (
                 stat.S_IFREG + 0o644,
                 131078,
                 64769,
                 154,
                 1000,
                 1000,
                 12288,
                 1323629595,
                 1324180496,
                 1324180496,
             )
         )
         entry = index_entry_from_stat(st, "22" * 20, 0, mode=stat.S_IFREG + 0o755)
         self.assertEqual(
             entry,
             IndexEntry(
                 1324180496,
                 1324180496,
                 64769,
                 131078,
                 33261,
                 1000,
                 1000,
                 12288,
                 "2222222222222222222222222222222222222222",
                 0,
                 None,
             ),
         )
 
 
 class BuildIndexTests(TestCase):
     def assertReasonableIndexEntry(self, index_entry, mode, filesize, sha):
         self.assertEqual(index_entry[4], mode)  # mode
         self.assertEqual(index_entry[7], filesize)  # filesize
         self.assertEqual(index_entry[8], sha)  # sha
 
     def assertFileContents(self, path, contents, symlink=False):
         if symlink:
             self.assertEqual(os.readlink(path), contents)
         else:
             with open(path, "rb") as f:
                 self.assertEqual(f.read(), contents)
 
     def test_empty(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
             tree = Tree()
             repo.object_store.add_object(tree)
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
             self.assertEqual(len(index), 0)
 
             # Verify no files
             self.assertEqual([".git"], os.listdir(repo.path))
 
     def test_git_dir(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Populate repo
             filea = Blob.from_string(b"file a")
             filee = Blob.from_string(b"d")
 
             tree = Tree()
             tree[b".git/a"] = (stat.S_IFREG | 0o644, filea.id)
             tree[b"c/e"] = (stat.S_IFREG | 0o644, filee.id)
 
             repo.object_store.add_objects([(o, None) for o in [filea, filee, tree]])
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
             self.assertEqual(len(index), 1)
 
             # filea
             apath = os.path.join(repo.path, ".git", "a")
             self.assertFalse(os.path.exists(apath))
 
             # filee
             epath = os.path.join(repo.path, "c", "e")
             self.assertTrue(os.path.exists(epath))
             self.assertReasonableIndexEntry(
                 index[b"c/e"], stat.S_IFREG | 0o644, 1, filee.id
             )
             self.assertFileContents(epath, b"d")
 
     def test_nonempty(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Populate repo
             filea = Blob.from_string(b"file a")
             fileb = Blob.from_string(b"file b")
             filed = Blob.from_string(b"file d")
 
             tree = Tree()
             tree[b"a"] = (stat.S_IFREG | 0o644, filea.id)
             tree[b"b"] = (stat.S_IFREG | 0o644, fileb.id)
             tree[b"c/d"] = (stat.S_IFREG | 0o644, filed.id)
 
             repo.object_store.add_objects(
                 [(o, None) for o in [filea, fileb, filed, tree]]
             )
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
             self.assertEqual(len(index), 3)
 
             # filea
             apath = os.path.join(repo.path, "a")
             self.assertTrue(os.path.exists(apath))
             self.assertReasonableIndexEntry(
                 index[b"a"], stat.S_IFREG | 0o644, 6, filea.id
             )
             self.assertFileContents(apath, b"file a")
 
             # fileb
             bpath = os.path.join(repo.path, "b")
             self.assertTrue(os.path.exists(bpath))
             self.assertReasonableIndexEntry(
                 index[b"b"], stat.S_IFREG | 0o644, 6, fileb.id
             )
             self.assertFileContents(bpath, b"file b")
 
             # filed
             dpath = os.path.join(repo.path, "c", "d")
             self.assertTrue(os.path.exists(dpath))
             self.assertReasonableIndexEntry(
                 index[b"c/d"], stat.S_IFREG | 0o644, 6, filed.id
             )
             self.assertFileContents(dpath, b"file d")
 
             # Verify no extra files
             self.assertEqual([".git", "a", "b", "c"], sorted(os.listdir(repo.path)))
             self.assertEqual(["d"], sorted(os.listdir(os.path.join(repo.path, "c"))))
 
     @skipIf(not getattr(os, "sync", None), "Requires sync support")
     def test_norewrite(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
             # Populate repo
             filea = Blob.from_string(b"file a")
             filea_path = os.path.join(repo_dir, "a")
             tree = Tree()
             tree[b"a"] = (stat.S_IFREG | 0o644, filea.id)
 
             repo.object_store.add_objects([(o, None) for o in [filea, tree]])
 
             # First Write
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
             # Use sync as metadata can be cached on some FS
             os.sync()
             mtime = os.stat(filea_path).st_mtime
 
             # Test Rewrite
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
             os.sync()
             self.assertEqual(mtime, os.stat(filea_path).st_mtime)
 
             # Modify content
             with open(filea_path, "wb") as fh:
                 fh.write(b"test a")
             os.sync()
             mtime = os.stat(filea_path).st_mtime
 
             # Test rewrite
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
             os.sync()
             with open(filea_path, "rb") as fh:
                 self.assertEqual(b"file a", fh.read())
 
     @skipIf(not can_symlink(), "Requires symlink support")
     def test_symlink(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Populate repo
             filed = Blob.from_string(b"file d")
             filee = Blob.from_string(b"d")
 
             tree = Tree()
             tree[b"c/d"] = (stat.S_IFREG | 0o644, filed.id)
             tree[b"c/e"] = (stat.S_IFLNK, filee.id)  # symlink
 
             repo.object_store.add_objects([(o, None) for o in [filed, filee, tree]])
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
 
             # symlink to d
             epath = os.path.join(repo.path, "c", "e")
             self.assertTrue(os.path.exists(epath))
             self.assertReasonableIndexEntry(
                 index[b"c/e"],
                 stat.S_IFLNK,
                 0 if sys.platform == "win32" else 1,
                 filee.id,
             )
             self.assertFileContents(epath, "d", symlink=True)
 
     def test_no_decode_encode(self):
         repo_dir = tempfile.mkdtemp()
         repo_dir_bytes = os.fsencode(repo_dir)
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Populate repo
             file = Blob.from_string(b"foo")
 
             tree = Tree()
             latin1_name = u"À".encode("latin1")
             latin1_path = os.path.join(repo_dir_bytes, latin1_name)
             utf8_name = u"À".encode("utf8")
             utf8_path = os.path.join(repo_dir_bytes, utf8_name)
             tree[latin1_name] = (stat.S_IFREG | 0o644, file.id)
             tree[utf8_name] = (stat.S_IFREG | 0o644, file.id)
 
             repo.object_store.add_objects([(o, None) for o in [file, tree]])
 
             try:
                 build_index_from_tree(
                     repo.path, repo.index_path(), repo.object_store, tree.id
                 )
             except OSError as e:
                 if e.errno == 92 and sys.platform == "darwin":
                     # Our filename isn't supported by the platform :(
                     self.skipTest("can not write filename %r" % e.filename)
                 else:
                     raise
             except UnicodeDecodeError:
                 # This happens e.g. with python3.6 on Windows.
                 # It implicitly decodes using utf8, which doesn't work.
                 self.skipTest("can not implicitly convert as utf8")
 
             # Verify index entries
             index = repo.open_index()
             self.assertIn(latin1_name, index)
             self.assertIn(utf8_name, index)
 
             self.assertTrue(os.path.exists(latin1_path))
 
             self.assertTrue(os.path.exists(utf8_path))
 
     def test_git_submodule(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
             filea = Blob.from_string(b"file alalala")
 
             subtree = Tree()
             subtree[b"a"] = (stat.S_IFREG | 0o644, filea.id)
 
             c = Commit()
             c.tree = subtree.id
             c.committer = c.author = b"Somebody <somebody@example.com>"
             c.commit_time = c.author_time = 42342
             c.commit_timezone = c.author_timezone = 0
             c.parents = []
             c.message = b"Subcommit"
 
             tree = Tree()
             tree[b"c"] = (S_IFGITLINK, c.id)
 
             repo.object_store.add_objects([(o, None) for o in [tree]])
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
             self.assertEqual(len(index), 1)
 
             # filea
             apath = os.path.join(repo.path, "c/a")
             self.assertFalse(os.path.exists(apath))
 
             # dir c
             cpath = os.path.join(repo.path, "c")
             self.assertTrue(os.path.isdir(cpath))
             self.assertEqual(index[b"c"][4], S_IFGITLINK)  # mode
             self.assertEqual(index[b"c"][8], c.id)  # sha
 
     def test_git_submodule_exists(self):
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
             filea = Blob.from_string(b"file alalala")
 
             subtree = Tree()
             subtree[b"a"] = (stat.S_IFREG | 0o644, filea.id)
 
             c = Commit()
             c.tree = subtree.id
             c.committer = c.author = b"Somebody <somebody@example.com>"
             c.commit_time = c.author_time = 42342
             c.commit_timezone = c.author_timezone = 0
             c.parents = []
             c.message = b"Subcommit"
 
             tree = Tree()
             tree[b"c"] = (S_IFGITLINK, c.id)
 
             os.mkdir(os.path.join(repo_dir, "c"))
             repo.object_store.add_objects([(o, None) for o in [tree]])
 
             build_index_from_tree(
                 repo.path, repo.index_path(), repo.object_store, tree.id
             )
 
             # Verify index entries
             index = repo.open_index()
             self.assertEqual(len(index), 1)
 
             # filea
             apath = os.path.join(repo.path, "c/a")
             self.assertFalse(os.path.exists(apath))
 
             # dir c
             cpath = os.path.join(repo.path, "c")
             self.assertTrue(os.path.isdir(cpath))
             self.assertEqual(index[b"c"][4], S_IFGITLINK)  # mode
             self.assertEqual(index[b"c"][8], c.id)  # sha
 
 
 class GetUnstagedChangesTests(TestCase):
     def test_get_unstaged_changes(self):
         """Unit test for get_unstaged_changes."""
 
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Commit a dummy file then modify it
             foo1_fullpath = os.path.join(repo_dir, "foo1")
             with open(foo1_fullpath, "wb") as f:
                 f.write(b"origstuff")
 
             foo2_fullpath = os.path.join(repo_dir, "foo2")
             with open(foo2_fullpath, "wb") as f:
                 f.write(b"origstuff")
 
             repo.stage(["foo1", "foo2"])
             repo.do_commit(
                 b"test status",
                 author=b"author <email>",
                 committer=b"committer <email>",
             )
 
             with open(foo1_fullpath, "wb") as f:
                 f.write(b"newstuff")
 
             # modify access and modify time of path
             os.utime(foo1_fullpath, (0, 0))
 
-            changes = get_unstaged_changes(repo.open_index(), repo)
+            changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
             self.assertEqual(list(changes), [b"foo1"])
 
     def test_get_unstaged_deleted_changes(self):
         """Unit test for get_unstaged_changes."""
 
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Commit a dummy file then remove it
             foo1_fullpath = os.path.join(repo_dir, "foo1")
             with open(foo1_fullpath, "wb") as f:
                 f.write(b"origstuff")
 
             repo.stage(["foo1"])
             repo.do_commit(
                 b"test status",
                 author=b"author <email>",
                 committer=b"committer <email>",
             )
 
             os.unlink(foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo)
+            changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
             self.assertEqual(list(changes), [b"foo1"])
 
     def test_get_unstaged_changes_removed_replaced_by_directory(self):
         """Unit test for get_unstaged_changes."""
 
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Commit a dummy file then modify it
             foo1_fullpath = os.path.join(repo_dir, "foo1")
             with open(foo1_fullpath, "wb") as f:
                 f.write(b"origstuff")
 
             repo.stage(["foo1"])
             repo.do_commit(
                 b"test status",
                 author=b"author <email>",
                 committer=b"committer <email>",
             )
 
             os.remove(foo1_fullpath)
             os.mkdir(foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo)
+            changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
             self.assertEqual(list(changes), [b"foo1"])
 
     @skipIf(not can_symlink(), "Requires symlink support")
     def test_get_unstaged_changes_removed_replaced_by_link(self):
         """Unit test for get_unstaged_changes."""
 
         repo_dir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, repo_dir)
         with Repo.init(repo_dir) as repo:
 
             # Commit a dummy file then modify it
             foo1_fullpath = os.path.join(repo_dir, "foo1")
             with open(foo1_fullpath, "wb") as f:
                 f.write(b"origstuff")
 
             repo.stage(["foo1"])
             repo.do_commit(
                 b"test status",
                 author=b"author <email>",
                 committer=b"committer <email>",
             )
 
             os.remove(foo1_fullpath)
             os.symlink(os.path.dirname(foo1_fullpath), foo1_fullpath)
 
-            changes = get_unstaged_changes(repo.open_index(), repo)
+            changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
             self.assertEqual(list(changes), [b"foo1"])
 
 
 class TestValidatePathElement(TestCase):
     def test_default(self):
         self.assertTrue(validate_path_element_default(b"bla"))
         self.assertTrue(validate_path_element_default(b".bla"))
         self.assertFalse(validate_path_element_default(b".git"))
         self.assertFalse(validate_path_element_default(b".giT"))
         self.assertFalse(validate_path_element_default(b".."))
         self.assertTrue(validate_path_element_default(b"git~1"))
 
     def test_ntfs(self):
         self.assertTrue(validate_path_element_ntfs(b"bla"))
         self.assertTrue(validate_path_element_ntfs(b".bla"))
         self.assertFalse(validate_path_element_ntfs(b".git"))
         self.assertFalse(validate_path_element_ntfs(b".giT"))
         self.assertFalse(validate_path_element_ntfs(b".."))
         self.assertFalse(validate_path_element_ntfs(b"git~1"))
 
 
 class TestTreeFSPathConversion(TestCase):
     def test_tree_to_fs_path(self):
         tree_path = u"délwíçh/foo".encode("utf8")
         fs_path = _tree_to_fs_path(b"/prefix/path", tree_path)
         self.assertEqual(
             fs_path,
             os.fsencode(os.path.join(u"/prefix/path", u"délwíçh", u"foo")),
         )
 
     def test_fs_to_tree_path_str(self):
         fs_path = os.path.join(os.path.join(u"délwíçh", u"foo"))
         tree_path = _fs_to_tree_path(fs_path)
         self.assertEqual(tree_path, u"délwíçh/foo".encode("utf-8"))
 
     def test_fs_to_tree_path_bytes(self):
         fs_path = os.path.join(os.fsencode(os.path.join(u"délwíçh", u"foo")))
         tree_path = _fs_to_tree_path(fs_path)
         self.assertEqual(tree_path, u"délwíçh/foo".encode("utf-8"))