diff --git a/dulwich/clone.py b/dulwich/clone.py
new file mode 100644
index 00000000..b9b25a9e
--- /dev/null
+++ b/dulwich/clone.py
@@ -0,0 +1,179 @@
+# clone.py
+# Copyright (C) 2021 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Repository clone handling."""
+
+import os
+from typing import TYPE_CHECKING, Callable, Tuple
+
+from dulwich.objects import (
+    Tag,
+)
+from dulwich.refs import (
+    LOCAL_BRANCH_PREFIX,
+    LOCAL_TAG_PREFIX,
+)
+
+if TYPE_CHECKING:
+    from dulwich.repo import Repo
+
+
+def do_clone(
+    source_path,
+    target_path,
+    clone_refs: Callable[["Repo", bytes], Tuple[bytes, bytes]] = None,
+    mkdir=True,
+    bare=False,
+    origin=b"origin",
+    checkout=None,
+    errstream=None,
+    branch=None,
+):
+    """Clone a repository.
+
+    Args:
+      source_path: Source repository path
+      target_path: Target repository path
+      clone_refs: Callback to handle setting up cloned remote refs in
+        the target repo
+      mkdir: Create the target directory
+      bare: Whether to create a bare repository
+      checkout: Whether or not to check-out HEAD after cloning
+      origin: Base name for refs in target repository
+        cloned from this repository
+      branch: Optional branch or tag to be used as HEAD in the new repository
+        instead of the source repository's HEAD.
+    Returns: Created repository as `Repo`
+    """
+    from dulwich.repo import Repo
+
+    if not clone_refs:
+        raise ValueError("clone_refs callback is required")
+
+    if not bare:
+        target = Repo.init(target_path, mkdir=mkdir)
+        if checkout is None:
+            checkout = True
+    else:
+        if checkout:
+            raise ValueError("checkout and bare are incompatible")
+        target = Repo.init_bare(target_path, mkdir=mkdir)
+
+    try:
+        target_config = target.get_config()
+        target_config.set((b"remote", origin), b"url", source_path)
+        target_config.set(
+            (b"remote", origin),
+            b"fetch",
+            b"+refs/heads/*:refs/remotes/" + origin + b"/*",
+        )
+        target_config.write_to_path()
+
+        ref_message = b"clone: from " + source_path
+        origin_head, origin_sha = clone_refs(target, ref_message)
+        if origin_sha and not origin_head:
+            # set detached HEAD
+            target.refs[b"HEAD"] = origin_sha
+
+        _set_origin_head(target, origin, origin_head)
+        head_ref = _set_default_branch(
+            target, origin, origin_head, branch, ref_message
+        )
+
+        # Update target head
+        if head_ref:
+            head = _set_head(target, head_ref, ref_message)
+        else:
+            head = None
+
+        if checkout and head is not None:
+            if errstream:
+                errstream.write(b"Checking out " + head + b"\n")
+            target.reset_index()
+    except BaseException:
+        target.close()
+        raise
+
+    return target
+
+
+def _set_origin_head(r, origin, origin_head):
+    # set refs/remotes/origin/HEAD
+    origin_base = b"refs/remotes/" + origin + b"/"
+    if origin_head and origin_head.startswith(LOCAL_BRANCH_PREFIX):
+        origin_ref = origin_base + b"HEAD"
+        target_ref = origin_base + origin_head[len(LOCAL_BRANCH_PREFIX) :]
+        if target_ref in r.refs:
+            r.refs.set_symbolic_ref(origin_ref, target_ref)
+
+
+def _set_default_branch(r, origin, origin_head, branch, ref_message):
+    origin_base = b"refs/remotes/" + origin + b"/"
+    if branch:
+        origin_ref = origin_base + branch
+        if origin_ref in r.refs:
+            local_ref = LOCAL_BRANCH_PREFIX + branch
+            r.refs.add_if_new(
+                local_ref, r.refs[origin_ref], ref_message
+            )
+            head_ref = local_ref
+        elif LOCAL_TAG_PREFIX + branch in r.refs:
+            head_ref = LOCAL_TAG_PREFIX + branch
+        else:
+            raise ValueError(
+                "%s is not a valid branch or tag" % os.fsencode(branch)
+            )
+    elif origin_head:
+        head_ref = origin_head
+        if origin_head.startswith(LOCAL_BRANCH_PREFIX):
+            origin_ref = origin_base + origin_head[len(LOCAL_BRANCH_PREFIX) :]
+        else:
+            origin_ref = origin_head
+        try:
+            r.refs.add_if_new(
+                head_ref, r.refs[origin_ref], ref_message
+            )
+        except KeyError:
+            pass
+    return head_ref
+
+
+def _set_head(r, head_ref, ref_message):
+    if head_ref.startswith(LOCAL_TAG_PREFIX):
+        # detach HEAD at specified tag
+        head = r.refs[head_ref]
+        if isinstance(head, Tag):
+            _cls, obj = head.object
+            head = obj.get_object(obj).id
+        del r.refs[b"HEAD"]
+        r.refs.set_if_equals(
+            b"HEAD", None, head, message=ref_message
+        )
+    else:
+        # set HEAD to specific branch
+        try:
+            head = r.refs[head_ref]
+            r.refs.set_symbolic_ref(b"HEAD", head_ref)
+            r.refs.set_if_equals(
+                b"HEAD", None, head, message=ref_message
+            )
+        except KeyError:
+            head = None
+    return head
diff --git a/dulwich/porcelain.py b/dulwich/porcelain.py
index 1af6f221..80017f10 100644
--- a/dulwich/porcelain.py
+++ b/dulwich/porcelain.py
@@ -1,1936 +1,1939 @@
 # porcelain.py -- Porcelain-like layer on top of Dulwich
 # Copyright (C) 2013 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 
 """Simple wrapper that provides porcelain-like functions on top of Dulwich.
 
 Currently implemented:
  * archive
  * add
  * branch{_create,_delete,_list}
  * check-ignore
  * checkout
  * clone
  * commit
  * commit-tree
  * daemon
  * describe
  * diff-tree
  * fetch
  * init
  * ls-files
  * ls-remote
  * ls-tree
  * pull
  * push
  * rm
  * remote{_add}
  * receive-pack
  * reset
  * rev-list
  * tag{_create,_delete,_list}
  * upload-pack
  * update-server-info
  * status
  * symbolic-ref
 
 These functions are meant to behave similarly to the git subcommands.
 Differences in behaviour are considered bugs.
 
 Note: one of the consequences of this is that paths tend to be
 interpreted relative to the current working directory rather than relative
 to the repository root.
 
 Functions should generally accept both unicode strings and bytestrings
 """
 
 from collections import namedtuple
 from contextlib import (
     closing,
     contextmanager,
 )
 from io import BytesIO, RawIOBase
 import datetime
 import os
 from pathlib import Path
 import posixpath
 import shutil
 import stat
 import sys
 import time
 from typing import (
     Dict,
     Optional,
     Tuple,
     Union,
 )
 
 from dulwich.archive import (
     tar_stream,
 )
 from dulwich.client import (
     get_transport_and_path,
 )
+from dulwich.clone import (
+    do_clone,
+)
 from dulwich.config import (
     StackedConfig,
 )
 from dulwich.diff_tree import (
     CHANGE_ADD,
     CHANGE_DELETE,
     CHANGE_MODIFY,
     CHANGE_RENAME,
     CHANGE_COPY,
     RENAME_CHANGE_TYPES,
 )
 from dulwich.errors import (
     SendPackError,
 )
 from dulwich.graph import (
     can_fast_forward,
 )
 from dulwich.ignore import IgnoreFilterManager
 from dulwich.index import (
     blob_from_path_and_stat,
     get_unstaged_changes,
     build_file_from_blob,
     _fs_to_tree_path,
 )
 from dulwich.object_store import (
     tree_lookup_path,
 )
 from dulwich.objects import (
     Commit,
     Tag,
     format_timezone,
     parse_timezone,
     pretty_format_tree_entry,
 )
 from dulwich.objectspec import (
     parse_commit,
     parse_object,
     parse_ref,
     parse_reftuples,
     parse_tree,
 )
 from dulwich.pack import (
     write_pack_index,
     write_pack_objects,
 )
 from dulwich.patch import write_tree_diff
 from dulwich.protocol import (
     Protocol,
     ZERO_SHA,
 )
 from dulwich.refs import (
     ANNOTATED_TAG_SUFFIX,
     LOCAL_BRANCH_PREFIX,
     LOCAL_TAG_PREFIX,
     strip_peeled_refs,
     RefsContainer,
 )
 from dulwich.repo import BaseRepo, Repo
 from dulwich.server import (
     FileSystemBackend,
     TCPGitServer,
     ReceivePackHandler,
     UploadPackHandler,
     update_server_info as server_update_server_info,
 )
 
 
 # Module level tuple definition for status output
 GitStatus = namedtuple("GitStatus", "staged unstaged untracked")
 
 
 class NoneStream(RawIOBase):
     """Fallback if stdout or stderr are unavailable, does nothing."""
 
     def read(self, size=-1):
         return None
 
     def readall(self):
         return None
 
     def readinto(self, b):
         return None
 
     def write(self, b):
         return None
 
 
 default_bytes_out_stream = getattr(sys.stdout, "buffer", None) or NoneStream()
 default_bytes_err_stream = getattr(sys.stderr, "buffer", None) or NoneStream()
 
 
 DEFAULT_ENCODING = "utf-8"
 
 
 class Error(Exception):
     """Porcelain-based error. """
 
     def __init__(self, msg, inner=None):
         super(Error, self).__init__(msg)
         self.inner = inner
 
 
 class RemoteExists(Error):
     """Raised when the remote already exists."""
 
 
 def open_repo(path_or_repo):
     """Open an argument that can be a repository or a path for a repository."""
     if isinstance(path_or_repo, BaseRepo):
         return path_or_repo
     return Repo(path_or_repo)
 
 
 @contextmanager
 def _noop_context_manager(obj):
     """Context manager that has the same api as closing but does nothing."""
     yield obj
 
 
 def open_repo_closing(path_or_repo):
     """Open an argument that can be a repository or a path for a repository.
     returns a context manager that will close the repo on exit if the argument
     is a path, else does nothing if the argument is a repo.
     """
     if isinstance(path_or_repo, BaseRepo):
         return _noop_context_manager(path_or_repo)
     return closing(Repo(path_or_repo))
 
 
 def path_to_tree_path(repopath, path, tree_encoding=DEFAULT_ENCODING):
     """Convert a path to a path usable in an index, e.g. bytes and relative to
     the repository root.
 
     Args:
       repopath: Repository path, absolute or relative to the cwd
       path: A path, absolute or relative to the cwd
     Returns: A path formatted for use in e.g. an index
     """
     # Resolve might returns a relative path on Windows
     # https://bugs.python.org/issue38671
     if sys.platform == "win32":
         path = os.path.abspath(path)
 
     path = Path(path)
     resolved_path = path.resolve()
 
     # Resolve and abspath seems to behave differently regarding symlinks,
     # as we are doing abspath on the file path, we need to do the same on
     # the repo path or they might not match
     if sys.platform == "win32":
         repopath = os.path.abspath(repopath)
 
     repopath = Path(repopath).resolve()
 
     try:
         relpath = resolved_path.relative_to(repopath)
     except ValueError:
         # If path is a symlink that points to a file outside the repo, we
         # want the relpath for the link itself, not the resolved target
         if path.is_symlink():
             parent = path.parent.resolve()
             relpath = (parent / path.name).relative_to(repopath)
         else:
             raise
     if sys.platform == "win32":
         return str(relpath).replace(os.path.sep, "/").encode(tree_encoding)
     else:
         return bytes(relpath)
 
 
 class DivergedBranches(Error):
     """Branches have diverged and fast-forward is not possible."""
 
 
 def check_diverged(repo, current_sha, new_sha):
     """Check if updating to a sha can be done with fast forwarding.
 
     Args:
       repo: Repository object
       current_sha: Current head sha
       new_sha: New head sha
     """
     try:
         can = can_fast_forward(repo, current_sha, new_sha)
     except KeyError:
         can = False
     if not can:
         raise DivergedBranches(current_sha, new_sha)
 
 
 def archive(
     repo,
     committish=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
 ):
     """Create an archive.
 
     Args:
       repo: Path of repository for which to generate an archive.
       committish: Commit SHA1 or ref to use
       outstream: Output stream (defaults to stdout)
       errstream: Error stream (defaults to stderr)
     """
 
     if committish is None:
         committish = "HEAD"
     with open_repo_closing(repo) as repo_obj:
         c = parse_commit(repo_obj, committish)
         for chunk in tar_stream(
             repo_obj.object_store, repo_obj.object_store[c.tree], c.commit_time
         ):
             outstream.write(chunk)
 
 
 def update_server_info(repo="."):
     """Update server info files for a repository.
 
     Args:
       repo: path to the repository
     """
     with open_repo_closing(repo) as r:
         server_update_server_info(r)
 
 
 def symbolic_ref(repo, ref_name, force=False):
     """Set git symbolic ref into HEAD.
 
     Args:
       repo: path to the repository
       ref_name: short name of the new ref
       force: force settings without checking if it exists in refs/heads
     """
     with open_repo_closing(repo) as repo_obj:
         ref_path = _make_branch_ref(ref_name)
         if not force and ref_path not in repo_obj.refs.keys():
             raise Error("fatal: ref `%s` is not a ref" % ref_name)
         repo_obj.refs.set_symbolic_ref(b"HEAD", ref_path)
 
 
 def commit(
     repo=".",
     message=None,
     author=None,
     committer=None,
     encoding=None,
     no_verify=False,
 ):
     """Create a new commit.
 
     Args:
       repo: Path to repository
       message: Optional commit message
       author: Optional author name and email
       committer: Optional committer name and email
       no_verify: Skip pre-commit and commit-msg hooks
     Returns: SHA1 of the new commit
     """
     # FIXME: Support --all argument
     # FIXME: Support --signoff argument
     if getattr(message, "encode", None):
         message = message.encode(encoding or DEFAULT_ENCODING)
     if getattr(author, "encode", None):
         author = author.encode(encoding or DEFAULT_ENCODING)
     if getattr(committer, "encode", None):
         committer = committer.encode(encoding or DEFAULT_ENCODING)
     with open_repo_closing(repo) as r:
         return r.do_commit(
             message=message,
             author=author,
             committer=committer,
             encoding=encoding,
             no_verify=no_verify,
         )
 
 
 def commit_tree(repo, tree, message=None, author=None, committer=None):
     """Create a new commit object.
 
     Args:
       repo: Path to repository
       tree: An existing tree object
       author: Optional author name and email
       committer: Optional committer name and email
     """
     with open_repo_closing(repo) as r:
         return r.do_commit(
             message=message, tree=tree, committer=committer, author=author
         )
 
 
 def init(path=".", bare=False):
     """Create a new git repository.
 
     Args:
       path: Path to repository.
       bare: Whether to create a bare repository.
     Returns: A Repo instance
     """
     if not os.path.exists(path):
         os.mkdir(path)
 
     if bare:
         return Repo.init_bare(path)
     else:
         return Repo.init(path)
 
 
 def clone(
     source,
     target=None,
     bare=False,
     checkout=None,
     errstream=default_bytes_err_stream,
     outstream=None,
     origin=b"origin",
     depth=None,
     branch=None,
     **kwargs
 ):
     """Clone a local or remote git repository.
 
     Args:
       source: Path or URL for source repository
       target: Path to target repository (optional)
       bare: Whether or not to create a bare repository
       checkout: Whether or not to check-out HEAD after cloning
       errstream: Optional stream to write progress to
       outstream: Optional stream to write progress to (deprecated)
       origin: Name of remote from the repository used to clone
       depth: Depth to fetch at
       branch: Optional branch or tag to be used as HEAD in the new repository
         instead of the cloned repository's HEAD.
     Returns: The new repository
     """
     if outstream is not None:
         import warnings
 
         warnings.warn(
             "outstream= has been deprecated in favour of errstream=.",
             DeprecationWarning,
             stacklevel=3,
         )
         errstream = outstream
 
     if checkout is None:
         checkout = not bare
     if checkout and bare:
         raise Error("checkout and bare are incompatible")
 
     if target is None:
         target = source.split("/")[-1]
 
     if not os.path.exists(target):
         os.mkdir(target)
 
     if not isinstance(source, bytes):
         source = source.encode(DEFAULT_ENCODING)
 
     def clone_refs(target_repo, ref_message):
         fetch_result = fetch(
             target_repo,
             origin,
             errstream=errstream,
             message=ref_message,
             depth=depth,
             **kwargs
         )
         head_ref = fetch_result.symrefs.get(b"HEAD", None)
         try:
             head_sha = target_repo[fetch_result.refs[b"HEAD"]].id
         except KeyError:
             head_sha = None
         return head_ref, head_sha
 
     try:
-        return Repo.do_clone(
+        return do_clone(
             source,
             target,
             clone_refs=clone_refs,
             mkdir=False,
             bare=bare,
             origin=origin,
             checkout=checkout,
             errstream=errstream,
             branch=branch,
         )
     except BaseException:
         shutil.rmtree(target)
         raise
 
 
 def add(repo=".", paths=None):
     """Add files to the staging area.
 
     Args:
       repo: Repository for the files
       paths: Paths to add.  No value passed stages all modified files.
     Returns: Tuple with set of added files and ignored files
 
     If the repository contains ignored directories, the returned set will
     contain the path to an ignored directory (with trailing slash). Individual
     files within ignored directories will not be returned.
     """
     ignored = set()
     with open_repo_closing(repo) as r:
         repo_path = Path(r.path).resolve()
         ignore_manager = IgnoreFilterManager.from_repo(r)
         if not paths:
             paths = list(
                 get_untracked_paths(
                     str(Path(os.getcwd()).resolve()),
                     str(repo_path),
                     r.open_index(),
                 )
             )
         relpaths = []
         if not isinstance(paths, list):
             paths = [paths]
         for p in paths:
             path = Path(p)
             relpath = str(path.resolve().relative_to(repo_path))
             # FIXME: Support patterns
             if path.is_dir():
                 relpath = os.path.join(relpath, "")
             if ignore_manager.is_ignored(relpath):
                 ignored.add(relpath)
                 continue
             relpaths.append(relpath)
         r.stage(relpaths)
     return (relpaths, ignored)
 
 
 def _is_subdir(subdir, parentdir):
     """Check whether subdir is parentdir or a subdir of parentdir
 
     If parentdir or subdir is a relative path, it will be disamgibuated
     relative to the pwd.
     """
     parentdir_abs = os.path.realpath(parentdir) + os.path.sep
     subdir_abs = os.path.realpath(subdir) + os.path.sep
     return subdir_abs.startswith(parentdir_abs)
 
 
 # TODO: option to remove ignored files also, in line with `git clean -fdx`
 def clean(repo=".", target_dir=None):
     """Remove any untracked files from the target directory recursively
 
     Equivalent to running `git clean -fd` in target_dir.
 
     Args:
       repo: Repository where the files may be tracked
       target_dir: Directory to clean - current directory if None
     """
     if target_dir is None:
         target_dir = os.getcwd()
 
     with open_repo_closing(repo) as r:
         if not _is_subdir(target_dir, r.path):
             raise Error("target_dir must be in the repo's working dir")
 
         config = r.get_config_stack()
         require_force = config.get_boolean(  # noqa: F841
             (b"clean",), b"requireForce", True
         )
 
         # TODO(jelmer): if require_force is set, then make sure that -f, -i or
         # -n is specified.
 
         index = r.open_index()
         ignore_manager = IgnoreFilterManager.from_repo(r)
 
         paths_in_wd = _walk_working_dir_paths(target_dir, r.path)
         # Reverse file visit order, so that files and subdirectories are
         # removed before containing directory
         for ap, is_dir in reversed(list(paths_in_wd)):
             if is_dir:
                 # All subdirectories and files have been removed if untracked,
                 # so dir contains no tracked files iff it is empty.
                 is_empty = len(os.listdir(ap)) == 0
                 if is_empty:
                     os.rmdir(ap)
             else:
                 ip = path_to_tree_path(r.path, ap)
                 is_tracked = ip in index
 
                 rp = os.path.relpath(ap, r.path)
                 is_ignored = ignore_manager.is_ignored(rp)
 
                 if not is_tracked and not is_ignored:
                     os.remove(ap)
 
 
 def remove(repo=".", paths=None, cached=False):
     """Remove files from the staging area.
 
     Args:
       repo: Repository for the files
       paths: Paths to remove
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
         for p in paths:
             full_path = os.fsencode(os.path.abspath(p))
             tree_path = path_to_tree_path(r.path, p)
             try:
                 index_sha = index[tree_path].sha
             except KeyError:
                 raise Error("%s did not match any files" % p)
 
             if not cached:
                 try:
                     st = os.lstat(full_path)
                 except OSError:
                     pass
                 else:
                     try:
                         blob = blob_from_path_and_stat(full_path, st)
                     except IOError:
                         pass
                     else:
                         try:
                             committed_sha = tree_lookup_path(
                                 r.__getitem__, r[r.head()].tree, tree_path
                             )[1]
                         except KeyError:
                             committed_sha = None
 
                         if blob.id != index_sha and index_sha != committed_sha:
                             raise Error(
                                 "file has staged content differing "
                                 "from both the file and head: %s" % p
                             )
 
                         if index_sha != committed_sha:
                             raise Error("file has staged changes: %s" % p)
                         os.remove(full_path)
             del index[tree_path]
         index.write()
 
 
 rm = remove
 
 
 def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING):
     if commit.encoding:
         encoding = commit.encoding.decode("ascii")
     else:
         encoding = default_encoding
     return contents.decode(encoding, "replace")
 
 
 def commit_encode(commit, contents, default_encoding=DEFAULT_ENCODING):
     if commit.encoding:
         encoding = commit.encoding.decode("ascii")
     else:
         encoding = default_encoding
     return contents.encode(encoding)
 
 
 def print_commit(commit, decode, outstream=sys.stdout):
     """Write a human-readable commit log entry.
 
     Args:
       commit: A `Commit` object
       outstream: A stream file to write to
     """
     outstream.write("-" * 50 + "\n")
     outstream.write("commit: " + commit.id.decode("ascii") + "\n")
     if len(commit.parents) > 1:
         outstream.write(
             "merge: "
             + "...".join([c.decode("ascii") for c in commit.parents[1:]])
             + "\n"
         )
     outstream.write("Author: " + decode(commit.author) + "\n")
     if commit.author != commit.committer:
         outstream.write("Committer: " + decode(commit.committer) + "\n")
 
     time_tuple = time.gmtime(commit.author_time + commit.author_timezone)
     time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple)
     timezone_str = format_timezone(commit.author_timezone).decode("ascii")
     outstream.write("Date:   " + time_str + " " + timezone_str + "\n")
     outstream.write("\n")
     outstream.write(decode(commit.message) + "\n")
     outstream.write("\n")
 
 
 def print_tag(tag, decode, outstream=sys.stdout):
     """Write a human-readable tag.
 
     Args:
       tag: A `Tag` object
       decode: Function for decoding bytes to unicode string
       outstream: A stream to write to
     """
     outstream.write("Tagger: " + decode(tag.tagger) + "\n")
     time_tuple = time.gmtime(tag.tag_time + tag.tag_timezone)
     time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple)
     timezone_str = format_timezone(tag.tag_timezone).decode("ascii")
     outstream.write("Date:   " + time_str + " " + timezone_str + "\n")
     outstream.write("\n")
     outstream.write(decode(tag.message))
     outstream.write("\n")
 
 
 def show_blob(repo, blob, decode, outstream=sys.stdout):
     """Write a blob to a stream.
 
     Args:
       repo: A `Repo` object
       blob: A `Blob` object
       decode: Function for decoding bytes to unicode string
       outstream: A stream file to write to
     """
     outstream.write(decode(blob.data))
 
 
 def show_commit(repo, commit, decode, outstream=sys.stdout):
     """Show a commit to a stream.
 
     Args:
       repo: A `Repo` object
       commit: A `Commit` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     print_commit(commit, decode=decode, outstream=outstream)
     if commit.parents:
         parent_commit = repo[commit.parents[0]]
         base_tree = parent_commit.tree
     else:
         base_tree = None
     diffstream = BytesIO()
     write_tree_diff(diffstream, repo.object_store, base_tree, commit.tree)
     diffstream.seek(0)
     outstream.write(commit_decode(commit, diffstream.getvalue()))
 
 
 def show_tree(repo, tree, decode, outstream=sys.stdout):
     """Print a tree to a stream.
 
     Args:
       repo: A `Repo` object
       tree: A `Tree` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     for n in tree:
         outstream.write(decode(n) + "\n")
 
 
 def show_tag(repo, tag, decode, outstream=sys.stdout):
     """Print a tag to a stream.
 
     Args:
       repo: A `Repo` object
       tag: A `Tag` object
       decode: Function for decoding bytes to unicode string
       outstream: Stream to write to
     """
     print_tag(tag, decode, outstream)
     show_object(repo, repo[tag.object[1]], decode, outstream)
 
 
 def show_object(repo, obj, decode, outstream):
     return {
         b"tree": show_tree,
         b"blob": show_blob,
         b"commit": show_commit,
         b"tag": show_tag,
     }[obj.type_name](repo, obj, decode, outstream)
 
 
 def print_name_status(changes):
     """Print a simple status summary, listing changed files."""
     for change in changes:
         if not change:
             continue
         if isinstance(change, list):
             change = change[0]
         if change.type == CHANGE_ADD:
             path1 = change.new.path
             path2 = ""
             kind = "A"
         elif change.type == CHANGE_DELETE:
             path1 = change.old.path
             path2 = ""
             kind = "D"
         elif change.type == CHANGE_MODIFY:
             path1 = change.new.path
             path2 = ""
             kind = "M"
         elif change.type in RENAME_CHANGE_TYPES:
             path1 = change.old.path
             path2 = change.new.path
             if change.type == CHANGE_RENAME:
                 kind = "R"
             elif change.type == CHANGE_COPY:
                 kind = "C"
         yield "%-8s%-20s%-20s" % (kind, path1, path2)
 
 
 def log(
     repo=".",
     paths=None,
     outstream=sys.stdout,
     max_entries=None,
     reverse=False,
     name_status=False,
 ):
     """Write commit logs.
 
     Args:
       repo: Path to repository
       paths: Optional set of specific paths to print entries for
       outstream: Stream to write log output to
       reverse: Reverse order in which entries are printed
       name_status: Print name status
       max_entries: Optional maximum number of entries to display
     """
     with open_repo_closing(repo) as r:
         walker = r.get_walker(max_entries=max_entries, paths=paths, reverse=reverse)
         for entry in walker:
 
             def decode(x):
                 return commit_decode(entry.commit, x)
 
             print_commit(entry.commit, decode, outstream)
             if name_status:
                 outstream.writelines(
                     [line + "\n" for line in print_name_status(entry.changes())]
                 )
 
 
 # TODO(jelmer): better default for encoding?
 def show(
     repo=".",
     objects=None,
     outstream=sys.stdout,
     default_encoding=DEFAULT_ENCODING,
 ):
     """Print the changes in a commit.
 
     Args:
       repo: Path to repository
       objects: Objects to show (defaults to [HEAD])
       outstream: Stream to write to
       default_encoding: Default encoding to use if none is set in the
         commit
     """
     if objects is None:
         objects = ["HEAD"]
     if not isinstance(objects, list):
         objects = [objects]
     with open_repo_closing(repo) as r:
         for objectish in objects:
             o = parse_object(r, objectish)
             if isinstance(o, Commit):
 
                 def decode(x):
                     return commit_decode(o, x, default_encoding)
 
             else:
 
                 def decode(x):
                     return x.decode(default_encoding)
 
             show_object(r, o, decode, outstream)
 
 
 def diff_tree(repo, old_tree, new_tree, outstream=sys.stdout):
     """Compares the content and mode of blobs found via two tree objects.
 
     Args:
       repo: Path to repository
       old_tree: Id of old tree
       new_tree: Id of new tree
       outstream: Stream to write to
     """
     with open_repo_closing(repo) as r:
         write_tree_diff(outstream, r.object_store, old_tree, new_tree)
 
 
 def rev_list(repo, commits, outstream=sys.stdout):
     """Lists commit objects in reverse chronological order.
 
     Args:
       repo: Path to repository
       commits: Commits over which to iterate
       outstream: Stream to write to
     """
     with open_repo_closing(repo) as r:
         for entry in r.get_walker(include=[r[c].id for c in commits]):
             outstream.write(entry.commit.id + b"\n")
 
 
 def tag(*args, **kwargs):
     import warnings
 
     warnings.warn(
         "tag has been deprecated in favour of tag_create.", DeprecationWarning
     )
     return tag_create(*args, **kwargs)
 
 
 def tag_create(
     repo,
     tag,
     author=None,
     message=None,
     annotated=False,
     objectish="HEAD",
     tag_time=None,
     tag_timezone=None,
     sign=False,
 ):
     """Creates a tag in git via dulwich calls:
 
     Args:
       repo: Path to repository
       tag: tag string
       author: tag author (optional, if annotated is set)
       message: tag message (optional)
       annotated: whether to create an annotated tag
       objectish: object the tag should point at, defaults to HEAD
       tag_time: Optional time for annotated tag
       tag_timezone: Optional timezone for annotated tag
       sign: GPG Sign the tag (bool, defaults to False,
         pass True to use default GPG key,
         pass a str containing Key ID to use a specific GPG key)
     """
 
     with open_repo_closing(repo) as r:
         object = parse_object(r, objectish)
 
         if annotated:
             # Create the tag object
             tag_obj = Tag()
             if author is None:
                 # TODO(jelmer): Don't use repo private method.
                 author = r._get_user_identity(r.get_config_stack())
             tag_obj.tagger = author
             tag_obj.message = message + "\n".encode()
             tag_obj.name = tag
             tag_obj.object = (type(object), object.id)
             if tag_time is None:
                 tag_time = int(time.time())
             tag_obj.tag_time = tag_time
             if tag_timezone is None:
                 # TODO(jelmer) Use current user timezone rather than UTC
                 tag_timezone = 0
             elif isinstance(tag_timezone, str):
                 tag_timezone = parse_timezone(tag_timezone)
             tag_obj.tag_timezone = tag_timezone
             if sign:
                 tag_obj.sign(sign if isinstance(sign, str) else None)
 
             r.object_store.add_object(tag_obj)
             tag_id = tag_obj.id
         else:
             tag_id = object.id
 
         r.refs[_make_tag_ref(tag)] = tag_id
 
 
 def list_tags(*args, **kwargs):
     import warnings
 
     warnings.warn(
         "list_tags has been deprecated in favour of tag_list.",
         DeprecationWarning,
     )
     return tag_list(*args, **kwargs)
 
 
 def tag_list(repo, outstream=sys.stdout):
     """List all tags.
 
     Args:
       repo: Path to repository
       outstream: Stream to write tags to
     """
     with open_repo_closing(repo) as r:
         tags = sorted(r.refs.as_dict(b"refs/tags"))
         return tags
 
 
 def tag_delete(repo, name):
     """Remove a tag.
 
     Args:
       repo: Path to repository
       name: Name of tag to remove
     """
     with open_repo_closing(repo) as r:
         if isinstance(name, bytes):
             names = [name]
         elif isinstance(name, list):
             names = name
         else:
             raise Error("Unexpected tag name type %r" % name)
         for name in names:
             del r.refs[_make_tag_ref(name)]
 
 
 def reset(repo, mode, treeish="HEAD"):
     """Reset current HEAD to the specified state.
 
     Args:
       repo: Path to repository
       mode: Mode ("hard", "soft", "mixed")
       treeish: Treeish to reset to
     """
 
     if mode != "hard":
         raise Error("hard is the only mode currently supported")
 
     with open_repo_closing(repo) as r:
         tree = parse_tree(r, treeish)
         r.reset_index(tree.id)
 
 
 def get_remote_repo(
     repo: Repo, remote_location: Optional[Union[str, bytes]] = None
 ) -> Tuple[Optional[str], str]:
     config = repo.get_config()
     if remote_location is None:
         remote_location = get_branch_remote(repo)
     if isinstance(remote_location, str):
         encoded_location = remote_location.encode()
     else:
         encoded_location = remote_location
 
     section = (b"remote", encoded_location)
 
     remote_name = None  # type: Optional[str]
 
     if config.has_section(section):
         remote_name = encoded_location.decode()
         url = config.get(section, "url")
         encoded_location = url
     else:
         remote_name = None
 
     return (remote_name, encoded_location.decode())
 
 
 def push(
     repo,
     remote_location=None,
     refspecs=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
     force=False,
     **kwargs
 ):
     """Remote push with dulwich via dulwich.client
 
     Args:
       repo: Path to repository
       remote_location: Location of the remote
       refspecs: Refs to push to remote
       outstream: A stream file to write output
       errstream: A stream file to write errors
       force: Force overwriting refs
     """
 
     # Open the repo
     with open_repo_closing(repo) as r:
         if refspecs is None:
             refspecs = [active_branch(r)]
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
         # Get the client and path
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
 
         selected_refs = []
         remote_changed_refs = {}
 
         def update_refs(refs):
             selected_refs.extend(parse_reftuples(r.refs, refs, refspecs, force=force))
             new_refs = {}
             # TODO: Handle selected_refs == {None: None}
             for (lh, rh, force_ref) in selected_refs:
                 if lh is None:
                     new_refs[rh] = ZERO_SHA
                     remote_changed_refs[rh] = None
                 else:
                     try:
                         localsha = r.refs[lh]
                     except KeyError:
                         raise Error("No valid ref %s in local repository" % lh)
                     if not force_ref and rh in refs:
                         check_diverged(r, refs[rh], localsha)
                     new_refs[rh] = localsha
                     remote_changed_refs[rh] = localsha
             return new_refs
 
         err_encoding = getattr(errstream, "encoding", None) or DEFAULT_ENCODING
         remote_location = client.get_url(path)
         try:
             result = client.send_pack(
                 path,
                 update_refs,
                 generate_pack_data=r.generate_pack_data,
                 progress=errstream.write,
             )
         except SendPackError as e:
             raise Error(
                 "Push to " + remote_location + " failed -> " + e.args[0].decode(),
                 inner=e,
             )
         else:
             errstream.write(
                 b"Push to " + remote_location.encode(err_encoding) + b" successful.\n"
             )
 
         for ref, error in (result.ref_status or {}).items():
             if error is not None:
                 errstream.write(
                     b"Push of ref %s failed: %s\n" % (ref, error.encode(err_encoding))
                 )
             else:
                 errstream.write(b"Ref %s updated\n" % ref)
 
         if remote_name is not None:
             _import_remote_refs(r.refs, remote_name, remote_changed_refs)
 
 
 def pull(
     repo,
     remote_location=None,
     refspecs=None,
     outstream=default_bytes_out_stream,
     errstream=default_bytes_err_stream,
     fast_forward=True,
     force=False,
     **kwargs
 ):
     """Pull from remote via dulwich.client
 
     Args:
       repo: Path to repository
       remote_location: Location of the remote
       refspec: refspecs to fetch
       outstream: A stream file to write to output
       errstream: A stream file to write to errors
     """
     # Open the repo
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
         if refspecs is None:
             refspecs = [b"HEAD"]
         selected_refs = []
 
         def determine_wants(remote_refs, **kwargs):
             selected_refs.extend(
                 parse_reftuples(remote_refs, r.refs, refspecs, force=force)
             )
             return [
                 remote_refs[lh]
                 for (lh, rh, force_ref) in selected_refs
                 if remote_refs[lh] not in r.object_store
             ]
 
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
         fetch_result = client.fetch(
             path, r, progress=errstream.write, determine_wants=determine_wants
         )
         for (lh, rh, force_ref) in selected_refs:
             try:
                 check_diverged(r, r.refs[rh], fetch_result.refs[lh])
             except DivergedBranches:
                 if fast_forward:
                     raise
                 else:
                     raise NotImplementedError("merge is not yet supported")
             r.refs[rh] = fetch_result.refs[lh]
         if selected_refs:
             r[b"HEAD"] = fetch_result.refs[selected_refs[0][1]]
 
         # Perform 'git checkout .' - syncs staged changes
         tree = r[b"HEAD"].tree
         r.reset_index(tree=tree)
         if remote_name is not None:
             _import_remote_refs(r.refs, remote_name, fetch_result.refs)
 
 
 def status(repo=".", ignored=False):
     """Returns staged, unstaged, and untracked changes relative to the HEAD.
 
     Args:
       repo: Path to repository or repository object
       ignored: Whether to include ignored files in `untracked`
     Returns: GitStatus tuple,
         staged -  dict with lists of staged paths (diff index/HEAD)
         unstaged -  list of unstaged paths (diff index/working-tree)
         untracked - list of untracked, un-ignored & non-.git paths
     """
     with open_repo_closing(repo) as r:
         # 1. Get status of staged
         tracked_changes = get_tree_changes(r)
         # 2. Get status of unstaged
         index = r.open_index()
         normalizer = r.get_blob_normalizer()
         filter_callback = normalizer.checkin_normalize
         unstaged_changes = list(get_unstaged_changes(index, r.path, filter_callback))
 
         untracked_paths = get_untracked_paths(
             r.path, r.path, index, exclude_ignored=not ignored
         )
         untracked_changes = list(untracked_paths)
 
         return GitStatus(tracked_changes, unstaged_changes, untracked_changes)
 
 
 def _walk_working_dir_paths(frompath, basepath, prune_dirnames=None):
     """Get path, is_dir for files in working dir from frompath
 
     Args:
       frompath: Path to begin walk
       basepath: Path to compare to
       prune_dirnames: Optional callback to prune dirnames during os.walk
         dirnames will be set to result of prune_dirnames(dirpath, dirnames)
     """
     for dirpath, dirnames, filenames in os.walk(frompath):
         # Skip .git and below.
         if ".git" in dirnames:
             dirnames.remove(".git")
             if dirpath != basepath:
                 continue
 
         if ".git" in filenames:
             filenames.remove(".git")
             if dirpath != basepath:
                 continue
 
         if dirpath != frompath:
             yield dirpath, True
 
         for filename in filenames:
             filepath = os.path.join(dirpath, filename)
             yield filepath, False
 
         if prune_dirnames:
             dirnames[:] = prune_dirnames(dirpath, dirnames)
 
 
 def get_untracked_paths(frompath, basepath, index, exclude_ignored=False):
     """Get untracked paths.
 
     Args:
       frompath: Path to walk
       basepath: Path to compare to
       index: Index to check against
       exclude_ignored: Whether to exclude ignored paths
 
     Note: ignored directories will never be walked for performance reasons.
       If exclude_ignored is False, only the path to an ignored directory will
       be yielded, no files inside the directory will be returned
     """
     with open_repo_closing(basepath) as r:
         ignore_manager = IgnoreFilterManager.from_repo(r)
 
     ignored_dirs = []
 
     def prune_dirnames(dirpath, dirnames):
         for i in range(len(dirnames) - 1, -1, -1):
             path = os.path.join(dirpath, dirnames[i])
             ip = os.path.join(os.path.relpath(path, basepath), "")
             if ignore_manager.is_ignored(ip):
                 if not exclude_ignored:
                     ignored_dirs.append(
                         os.path.join(os.path.relpath(path, frompath), "")
                     )
                 del dirnames[i]
         return dirnames
 
     for ap, is_dir in _walk_working_dir_paths(
         frompath, basepath, prune_dirnames=prune_dirnames
     ):
         if not is_dir:
             ip = path_to_tree_path(basepath, ap)
             if ip not in index:
                 if (
                     not exclude_ignored
                     or not ignore_manager.is_ignored(
                         os.path.relpath(ap, basepath)
                     )
                 ):
                     yield os.path.relpath(ap, frompath)
 
     yield from ignored_dirs
 
 
 def get_tree_changes(repo):
     """Return add/delete/modify changes to tree by comparing index to HEAD.
 
     Args:
       repo: repo path or object
     Returns: dict with lists for each type of change
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
 
         # Compares the Index to the HEAD & determines changes
         # Iterate through the changes and report add/delete/modify
         # TODO: call out to dulwich.diff_tree somehow.
         tracked_changes = {
             "add": [],
             "delete": [],
             "modify": [],
         }
         try:
             tree_id = r[b"HEAD"].tree
         except KeyError:
             tree_id = None
 
         for change in index.changes_from_tree(r.object_store, tree_id):
             if not change[0][0]:
                 tracked_changes["add"].append(change[0][1])
             elif not change[0][1]:
                 tracked_changes["delete"].append(change[0][0])
             elif change[0][0] == change[0][1]:
                 tracked_changes["modify"].append(change[0][0])
             else:
                 raise NotImplementedError("git mv ops not yet supported")
         return tracked_changes
 
 
 def daemon(path=".", address=None, port=None):
     """Run a daemon serving Git requests over TCP/IP.
 
     Args:
       path: Path to the directory to serve.
       address: Optional address to listen on (defaults to ::)
       port: Optional port to listen on (defaults to TCP_GIT_PORT)
     """
     # TODO(jelmer): Support git-daemon-export-ok and --export-all.
     backend = FileSystemBackend(path)
     server = TCPGitServer(backend, address, port)
     server.serve_forever()
 
 
 def web_daemon(path=".", address=None, port=None):
     """Run a daemon serving Git requests over HTTP.
 
     Args:
       path: Path to the directory to serve
       address: Optional address to listen on (defaults to ::)
       port: Optional port to listen on (defaults to 80)
     """
     from dulwich.web import (
         make_wsgi_chain,
         make_server,
         WSGIRequestHandlerLogger,
         WSGIServerLogger,
     )
 
     backend = FileSystemBackend(path)
     app = make_wsgi_chain(backend)
     server = make_server(
         address,
         port,
         app,
         handler_class=WSGIRequestHandlerLogger,
         server_class=WSGIServerLogger,
     )
     server.serve_forever()
 
 
 def upload_pack(path=".", inf=None, outf=None):
     """Upload a pack file after negotiating its contents using smart protocol.
 
     Args:
       path: Path to the repository
       inf: Input stream to communicate with client
       outf: Output stream to communicate with client
     """
     if outf is None:
         outf = getattr(sys.stdout, "buffer", sys.stdout)
     if inf is None:
         inf = getattr(sys.stdin, "buffer", sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
 
     def send_fn(data):
         outf.write(data)
         outf.flush()
 
     proto = Protocol(inf.read, send_fn)
     handler = UploadPackHandler(backend, [path], proto)
     # FIXME: Catch exceptions and write a single-line summary to outf.
     handler.handle()
     return 0
 
 
 def receive_pack(path=".", inf=None, outf=None):
     """Receive a pack file after negotiating its contents using smart protocol.
 
     Args:
       path: Path to the repository
       inf: Input stream to communicate with client
       outf: Output stream to communicate with client
     """
     if outf is None:
         outf = getattr(sys.stdout, "buffer", sys.stdout)
     if inf is None:
         inf = getattr(sys.stdin, "buffer", sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
 
     def send_fn(data):
         outf.write(data)
         outf.flush()
 
     proto = Protocol(inf.read, send_fn)
     handler = ReceivePackHandler(backend, [path], proto)
     # FIXME: Catch exceptions and write a single-line summary to outf.
     handler.handle()
     return 0
 
 
 def _make_branch_ref(name):
     if getattr(name, "encode", None):
         name = name.encode(DEFAULT_ENCODING)
     return LOCAL_BRANCH_PREFIX + name
 
 
 def _make_tag_ref(name):
     if getattr(name, "encode", None):
         name = name.encode(DEFAULT_ENCODING)
     return LOCAL_TAG_PREFIX + name
 
 
 def branch_delete(repo, name):
     """Delete a branch.
 
     Args:
       repo: Path to the repository
       name: Name of the branch
     """
     with open_repo_closing(repo) as r:
         if isinstance(name, list):
             names = name
         else:
             names = [name]
         for name in names:
             del r.refs[_make_branch_ref(name)]
 
 
 def branch_create(repo, name, objectish=None, force=False):
     """Create a branch.
 
     Args:
       repo: Path to the repository
       name: Name of the new branch
       objectish: Target object to point new branch at (defaults to HEAD)
       force: Force creation of branch, even if it already exists
     """
     with open_repo_closing(repo) as r:
         if objectish is None:
             objectish = "HEAD"
         object = parse_object(r, objectish)
         refname = _make_branch_ref(name)
         ref_message = b"branch: Created from " + objectish.encode("utf-8")
         if force:
             r.refs.set_if_equals(refname, None, object.id, message=ref_message)
         else:
             if not r.refs.add_if_new(refname, object.id, message=ref_message):
                 raise Error("Branch with name %s already exists." % name)
 
 
 def branch_list(repo):
     """List all branches.
 
     Args:
       repo: Path to the repository
     """
     with open_repo_closing(repo) as r:
         return r.refs.keys(base=LOCAL_BRANCH_PREFIX)
 
 
 def active_branch(repo):
     """Return the active branch in the repository, if any.
 
     Args:
       repo: Repository to open
     Returns:
       branch name
     Raises:
       KeyError: if the repository does not have a working tree
       IndexError: if HEAD is floating
     """
     with open_repo_closing(repo) as r:
         active_ref = r.refs.follow(b"HEAD")[0][1]
         if not active_ref.startswith(LOCAL_BRANCH_PREFIX):
             raise ValueError(active_ref)
         return active_ref[len(LOCAL_BRANCH_PREFIX) :]
 
 
 def get_branch_remote(repo):
     """Return the active branch's remote name, if any.
 
     Args:
       repo: Repository to open
     Returns:
       remote name
     Raises:
       KeyError: if the repository does not have a working tree
     """
     with open_repo_closing(repo) as r:
         branch_name = active_branch(r.path)
         config = r.get_config()
         try:
             remote_name = config.get((b"branch", branch_name), b"remote")
         except KeyError:
             remote_name = b"origin"
     return remote_name
 
 
 def _import_remote_refs(
     refs_container: RefsContainer,
     remote_name: str,
     refs: Dict[str, str],
     message: Optional[bytes] = None,
     prune: bool = False,
     prune_tags: bool = False,
 ):
     stripped_refs = strip_peeled_refs(refs)
     branches = {
         n[len(LOCAL_BRANCH_PREFIX) :]: v
         for (n, v) in stripped_refs.items()
         if n.startswith(LOCAL_BRANCH_PREFIX)
     }
     refs_container.import_refs(
         b"refs/remotes/" + remote_name.encode(),
         branches,
         message=message,
         prune=prune,
     )
     tags = {
         n[len(b"refs/tags/") :]: v
         for (n, v) in stripped_refs.items()
         if n.startswith(b"refs/tags/") and not n.endswith(ANNOTATED_TAG_SUFFIX)
     }
     refs_container.import_refs(b"refs/tags", tags, message=message, prune=prune_tags)
 
 
 def fetch(
     repo,
     remote_location=None,
     outstream=sys.stdout,
     errstream=default_bytes_err_stream,
     message=None,
     depth=None,
     prune=False,
     prune_tags=False,
     force=False,
     **kwargs
 ):
     """Fetch objects from a remote server.
 
     Args:
       repo: Path to the repository
       remote_location: String identifying a remote server
       outstream: Output stream (defaults to stdout)
       errstream: Error stream (defaults to stderr)
       message: Reflog message (defaults to b"fetch: from <remote_name>")
       depth: Depth to fetch at
       prune: Prune remote removed refs
       prune_tags: Prune reomte removed tags
     Returns:
       Dictionary with refs on the remote
     """
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
         if message is None:
             message = b"fetch: from " + remote_location.encode("utf-8")
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
         fetch_result = client.fetch(path, r, progress=errstream.write, depth=depth)
         if remote_name is not None:
             _import_remote_refs(
                 r.refs,
                 remote_name,
                 fetch_result.refs,
                 message,
                 prune=prune,
                 prune_tags=prune_tags,
             )
     return fetch_result
 
 
 def ls_remote(remote, config=None, **kwargs):
     """List the refs in a remote.
 
     Args:
       remote: Remote repository location
       config: Configuration to use
     Returns:
       Dictionary with remote refs
     """
     if config is None:
         config = StackedConfig.default()
     client, host_path = get_transport_and_path(remote, config=config, **kwargs)
     return client.get_refs(host_path)
 
 
 def repack(repo):
     """Repack loose files in a repository.
 
     Currently this only packs loose objects.
 
     Args:
       repo: Path to the repository
     """
     with open_repo_closing(repo) as r:
         r.object_store.pack_loose_objects()
 
 
 def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None):
     """Pack objects into a file.
 
     Args:
       repo: Path to the repository
       object_ids: List of object ids to write
       packf: File-like object to write to
       idxf: File-like object to write to (can be None)
     """
     with open_repo_closing(repo) as r:
         entries, data_sum = write_pack_objects(
             packf,
             r.object_store.iter_shas((oid, None) for oid in object_ids),
             delta_window_size=delta_window_size,
         )
     if idxf is not None:
         entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
         write_pack_index(idxf, entries, data_sum)
 
 
 def ls_tree(
     repo,
     treeish=b"HEAD",
     outstream=sys.stdout,
     recursive=False,
     name_only=False,
 ):
     """List contents of a tree.
 
     Args:
       repo: Path to the repository
       tree_ish: Tree id to list
       outstream: Output stream (defaults to stdout)
       recursive: Whether to recursively list files
       name_only: Only print item name
     """
 
     def list_tree(store, treeid, base):
         for (name, mode, sha) in store[treeid].iteritems():
             if base:
                 name = posixpath.join(base, name)
             if name_only:
                 outstream.write(name + b"\n")
             else:
                 outstream.write(pretty_format_tree_entry(name, mode, sha))
             if stat.S_ISDIR(mode) and recursive:
                 list_tree(store, sha, name)
 
     with open_repo_closing(repo) as r:
         tree = parse_tree(r, treeish)
         list_tree(r.object_store, tree.id, "")
 
 
 def remote_add(repo, name, url):
     """Add a remote.
 
     Args:
       repo: Path to the repository
       name: Remote name
       url: Remote URL
     """
     if not isinstance(name, bytes):
         name = name.encode(DEFAULT_ENCODING)
     if not isinstance(url, bytes):
         url = url.encode(DEFAULT_ENCODING)
     with open_repo_closing(repo) as r:
         c = r.get_config()
         section = (b"remote", name)
         if c.has_section(section):
             raise RemoteExists(section)
         c.set(section, b"url", url)
         c.write_to_path()
 
 
 def check_ignore(repo, paths, no_index=False):
     """Debug gitignore files.
 
     Args:
       repo: Path to the repository
       paths: List of paths to check for
       no_index: Don't check index
     Returns: List of ignored files
     """
     with open_repo_closing(repo) as r:
         index = r.open_index()
         ignore_manager = IgnoreFilterManager.from_repo(r)
         for path in paths:
             if not no_index and path_to_tree_path(r.path, path) in index:
                 continue
             if os.path.isabs(path):
                 path = os.path.relpath(path, r.path)
             if ignore_manager.is_ignored(path):
                 yield path
 
 
 def update_head(repo, target, detached=False, new_branch=None):
     """Update HEAD to point at a new branch/commit.
 
     Note that this does not actually update the working tree.
 
     Args:
       repo: Path to the repository
       detach: Create a detached head
       target: Branch or committish to switch to
       new_branch: New branch to create
     """
     with open_repo_closing(repo) as r:
         if new_branch is not None:
             to_set = _make_branch_ref(new_branch)
         else:
             to_set = b"HEAD"
         if detached:
             # TODO(jelmer): Provide some way so that the actual ref gets
             # updated rather than what it points to, so the delete isn't
             # necessary.
             del r.refs[to_set]
             r.refs[to_set] = parse_commit(r, target).id
         else:
             r.refs.set_symbolic_ref(to_set, parse_ref(r, target))
         if new_branch is not None:
             r.refs.set_symbolic_ref(b"HEAD", to_set)
 
 
 def reset_file(repo, file_path: str, target: bytes = b'HEAD'):
     """Reset the file to specific commit or branch.
 
     Args:
       repo: dulwich Repo object
       file_path: file to reset, relative to the repository path
       target: branch or commit or b'HEAD' to reset
     """
     tree = parse_tree(repo, treeish=target)
     file_path = _fs_to_tree_path(file_path)
 
     file_entry = tree.lookup_path(repo.object_store.__getitem__, file_path)
     full_path = os.path.join(repo.path.encode(), file_path)
     blob = repo.object_store[file_entry[1]]
     mode = file_entry[0]
     build_file_from_blob(blob, mode, full_path)
 
 
 def check_mailmap(repo, contact):
     """Check canonical name and email of contact.
 
     Args:
       repo: Path to the repository
       contact: Contact name and/or email
     Returns: Canonical contact data
     """
     with open_repo_closing(repo) as r:
         from dulwich.mailmap import Mailmap
 
         try:
             mailmap = Mailmap.from_path(os.path.join(r.path, ".mailmap"))
         except FileNotFoundError:
             mailmap = Mailmap()
         return mailmap.lookup(contact)
 
 
 def fsck(repo):
     """Check a repository.
 
     Args:
       repo: A path to the repository
     Returns: Iterator over errors/warnings
     """
     with open_repo_closing(repo) as r:
         # TODO(jelmer): check pack files
         # TODO(jelmer): check graph
         # TODO(jelmer): check refs
         for sha in r.object_store:
             o = r.object_store[sha]
             try:
                 o.check()
             except Exception as e:
                 yield (sha, e)
 
 
 def stash_list(repo):
     """List all stashes in a repository."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         return enumerate(list(stash.stashes()))
 
 
 def stash_push(repo):
     """Push a new stash onto the stack."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         stash.push()
 
 
 def stash_pop(repo, index):
     """Pop a stash from the stack."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         stash.pop(index)
 
 
 def stash_drop(repo, index):
     """Drop a stash from the stack."""
     with open_repo_closing(repo) as r:
         from dulwich.stash import Stash
 
         stash = Stash.from_repo(r)
         stash.drop(index)
 
 
 def ls_files(repo):
     """List all files in an index."""
     with open_repo_closing(repo) as r:
         return sorted(r.open_index())
 
 
 def describe(repo):
     """Describe the repository version.
 
     Args:
       projdir: git repository root
     Returns: a string description of the current git revision
 
     Examples: "gabcdefh", "v0.1" or "v0.1-5-gabcdefh".
     """
     # Get the repository
     with open_repo_closing(repo) as r:
         # Get a list of all tags
         refs = r.get_refs()
         tags = {}
         for key, value in refs.items():
             key = key.decode()
             obj = r.get_object(value)
             if u"tags" not in key:
                 continue
 
             _, tag = key.rsplit(u"/", 1)
 
             try:
                 commit = obj.object
             except AttributeError:
                 continue
             else:
                 commit = r.get_object(commit[1])
             tags[tag] = [
                 datetime.datetime(*time.gmtime(commit.commit_time)[:6]),
                 commit.id.decode("ascii"),
             ]
 
         sorted_tags = sorted(tags.items(), key=lambda tag: tag[1][0], reverse=True)
 
         # If there are no tags, return the current commit
         if len(sorted_tags) == 0:
             return "g{}".format(r[r.head()].id.decode("ascii")[:7])
 
         # We're now 0 commits from the top
         commit_count = 0
 
         # Get the latest commit
         latest_commit = r[r.head()]
 
         # Walk through all commits
         walker = r.get_walker()
         for entry in walker:
             # Check if tag
             commit_id = entry.commit.id.decode("ascii")
             for tag in sorted_tags:
                 tag_name = tag[0]
                 tag_commit = tag[1][1]
                 if commit_id == tag_commit:
                     if commit_count == 0:
                         return tag_name
                     else:
                         return "{}-{}-g{}".format(
                             tag_name,
                             commit_count,
                             latest_commit.id.decode("ascii")[:7],
                         )
 
             commit_count += 1
 
         # Return plain commit if no parent tag can be found
         return "g{}".format(latest_commit.id.decode("ascii")[:7])
 
 
 def get_object_by_path(repo, path, committish=None):
     """Get an object by path.
 
     Args:
       repo: A path to the repository
       path: Path to look up
       committish: Commit to look up path in
     Returns: A `ShaFile` object
     """
     if committish is None:
         committish = "HEAD"
     # Get the repository
     with open_repo_closing(repo) as r:
         commit = parse_commit(r, committish)
         base_tree = commit.tree
         if not isinstance(path, bytes):
             path = commit_encode(commit, path)
         (mode, sha) = tree_lookup_path(r.object_store.__getitem__, base_tree, path)
         return r[sha]
 
 
 def write_tree(repo):
     """Write a tree object from the index.
 
     Args:
       repo: Repository for which to write tree
     Returns: tree id for the tree that was written
     """
     with open_repo_closing(repo) as r:
         return r.open_index().commit(r.object_store)
diff --git a/dulwich/repo.py b/dulwich/repo.py
index 72e359d2..7d783447 100644
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -1,1824 +1,1700 @@
 # repo.py -- For dealing with git repositories.
 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
 #
 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
 # General Public License as public by the Free Software Foundation; version 2.0
 # or (at your option) any later version. You can redistribute it and/or
 # modify it under the terms of either of these two licenses.
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # You should have received a copy of the licenses; if not, see
 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
 # License, Version 2.0.
 #
 
 
 """Repository access.
 
 This module contains the base class for git repositories
 (BaseRepo) and an implementation which uses a repository on
 local disk (Repo).
 
 """
 
 from io import BytesIO
 import os
 import sys
 import stat
 import time
 from typing import Optional, Tuple, TYPE_CHECKING, List, Dict, Union, Iterable
 
 if TYPE_CHECKING:
     # There are no circular imports here, but we try to defer imports as long
     # as possible to reduce start-up time for anything that doesn't need
     # these imports.
     from dulwich.config import StackedConfig, ConfigFile
     from dulwich.index import Index
 
+from dulwich.clone import (
+    do_clone,
+)
 from dulwich.errors import (
     NoIndexPresent,
     NotBlobError,
     NotCommitError,
     NotGitRepository,
     NotTreeError,
     NotTagError,
     CommitError,
     RefFormatError,
     HookError,
 )
 from dulwich.file import (
     GitFile,
 )
 from dulwich.object_store import (
     DiskObjectStore,
     MemoryObjectStore,
     BaseObjectStore,
     ObjectStoreGraphWalker,
 )
 from dulwich.objects import (
     check_hexsha,
     valid_hexsha,
     Blob,
     Commit,
     ShaFile,
     Tag,
     Tree,
 )
 from dulwich.pack import (
     pack_objects_to_data,
 )
 
 from dulwich.hooks import (
     Hook,
     PreCommitShellHook,
     PostCommitShellHook,
     CommitMsgShellHook,
     PostReceiveShellHook,
 )
 
 from dulwich.line_ending import BlobNormalizer, TreeBlobNormalizer
 
 from dulwich.refs import (  # noqa: F401
     ANNOTATED_TAG_SUFFIX,
     LOCAL_BRANCH_PREFIX,
     LOCAL_TAG_PREFIX,
     check_ref_format,
     RefsContainer,
     DictRefsContainer,
     InfoRefsContainer,
     DiskRefsContainer,
     read_packed_refs,
     read_packed_refs_with_peeled,
     write_packed_refs,
     SYMREF,
 )
 
 
 import warnings
 
 
 CONTROLDIR = ".git"
 OBJECTDIR = "objects"
 REFSDIR = "refs"
 REFSDIR_TAGS = "tags"
 REFSDIR_HEADS = "heads"
 INDEX_FILENAME = "index"
 COMMONDIR = "commondir"
 GITDIR = "gitdir"
 WORKTREES = "worktrees"
 
 BASE_DIRECTORIES = [
     ["branches"],
     [REFSDIR],
     [REFSDIR, REFSDIR_TAGS],
     [REFSDIR, REFSDIR_HEADS],
     ["hooks"],
     ["info"],
 ]
 
 DEFAULT_REF = b"refs/heads/master"
 
 
 class InvalidUserIdentity(Exception):
     """User identity is not of the format 'user <email>'"""
 
     def __init__(self, identity):
         self.identity = identity
 
 
 def _get_default_identity() -> Tuple[str, str]:
     import getpass
     import socket
 
     username = getpass.getuser()
     try:
         import pwd
     except ImportError:
         fullname = None
     else:
         try:
             gecos = pwd.getpwnam(username).pw_gecos
         except KeyError:
             fullname = None
         else:
             fullname = gecos.split(",")[0]
     if not fullname:
         fullname = username
     email = os.environ.get("EMAIL")
     if email is None:
         email = "{}@{}".format(username, socket.gethostname())
     return (fullname, email)
 
 
 def get_user_identity(config: "StackedConfig", kind: Optional[str] = None) -> bytes:
     """Determine the identity to use for new commits.
 
     If kind is set, this first checks
     GIT_${KIND}_NAME and GIT_${KIND}_EMAIL.
 
     If those variables are not set, then it will fall back
     to reading the user.name and user.email settings from
     the specified configuration.
 
     If that also fails, then it will fall back to using
     the current users' identity as obtained from the host
     system (e.g. the gecos field, $EMAIL, $USER@$(hostname -f).
 
     Args:
       kind: Optional kind to return identity for,
         usually either "AUTHOR" or "COMMITTER".
 
     Returns:
       A user identity
     """
     user = None  # type: Optional[bytes]
     email = None  # type: Optional[bytes]
     if kind:
         user_uc = os.environ.get("GIT_" + kind + "_NAME")
         if user_uc is not None:
             user = user_uc.encode("utf-8")
         email_uc = os.environ.get("GIT_" + kind + "_EMAIL")
         if email_uc is not None:
             email = email_uc.encode("utf-8")
     if user is None:
         try:
             user = config.get(("user",), "name")
         except KeyError:
             user = None
     if email is None:
         try:
             email = config.get(("user",), "email")
         except KeyError:
             email = None
     default_user, default_email = _get_default_identity()
     if user is None:
         user = default_user.encode("utf-8")
     if email is None:
         email = default_email.encode("utf-8")
     if email.startswith(b"<") and email.endswith(b">"):
         email = email[1:-1]
     return user + b" <" + email + b">"
 
 
 def check_user_identity(identity):
     """Verify that a user identity is formatted correctly.
 
     Args:
       identity: User identity bytestring
     Raises:
       InvalidUserIdentity: Raised when identity is invalid
     """
     try:
         fst, snd = identity.split(b" <", 1)
     except ValueError:
         raise InvalidUserIdentity(identity)
     if b">" not in snd:
         raise InvalidUserIdentity(identity)
 
 
 def parse_graftpoints(
     graftpoints: Iterable[bytes],
 ) -> Dict[bytes, List[bytes]]:
     """Convert a list of graftpoints into a dict
 
     Args:
       graftpoints: Iterator of graftpoint lines
 
     Each line is formatted as:
         <commit sha1> <parent sha1> [<parent sha1>]*
 
     Resulting dictionary is:
         <commit sha1>: [<parent sha1>*]
 
     https://git.wiki.kernel.org/index.php/GraftPoint
     """
     grafts = {}
     for line in graftpoints:
         raw_graft = line.split(None, 1)
 
         commit = raw_graft[0]
         if len(raw_graft) == 2:
             parents = raw_graft[1].split()
         else:
             parents = []
 
         for sha in [commit] + parents:
             check_hexsha(sha, "Invalid graftpoint")
 
         grafts[commit] = parents
     return grafts
 
 
 def serialize_graftpoints(graftpoints: Dict[bytes, List[bytes]]) -> bytes:
     """Convert a dictionary of grafts into string
 
     The graft dictionary is:
         <commit sha1>: [<parent sha1>*]
 
     Each line is formatted as:
         <commit sha1> <parent sha1> [<parent sha1>]*
 
     https://git.wiki.kernel.org/index.php/GraftPoint
 
     """
     graft_lines = []
     for commit, parents in graftpoints.items():
         if parents:
             graft_lines.append(commit + b" " + b" ".join(parents))
         else:
             graft_lines.append(commit)
     return b"\n".join(graft_lines)
 
 
 def _set_filesystem_hidden(path):
     """Mark path as to be hidden if supported by platform and filesystem.
 
     On win32 uses SetFileAttributesW api:
     <https://docs.microsoft.com/windows/desktop/api/fileapi/nf-fileapi-setfileattributesw>
     """
     if sys.platform == "win32":
         import ctypes
         from ctypes.wintypes import BOOL, DWORD, LPCWSTR
 
         FILE_ATTRIBUTE_HIDDEN = 2
         SetFileAttributesW = ctypes.WINFUNCTYPE(BOOL, LPCWSTR, DWORD)(
             ("SetFileAttributesW", ctypes.windll.kernel32)
         )
 
         if isinstance(path, bytes):
             path = os.fsdecode(path)
         if not SetFileAttributesW(path, FILE_ATTRIBUTE_HIDDEN):
             pass  # Could raise or log `ctypes.WinError()` here
 
     # Could implement other platform specific filesytem hiding here
 
 
 class ParentsProvider(object):
     def __init__(self, store, grafts={}, shallows=[]):
         self.store = store
         self.grafts = grafts
         self.shallows = set(shallows)
 
     def get_parents(self, commit_id, commit=None):
         try:
             return self.grafts[commit_id]
         except KeyError:
             pass
         if commit_id in self.shallows:
             return []
         if commit is None:
             commit = self.store[commit_id]
         return commit.parents
 
 
 class BaseRepo(object):
     """Base class for a git repository.
 
     :ivar object_store: Dictionary-like object for accessing
         the objects
     :ivar refs: Dictionary-like object with the refs in this
         repository
     """
 
     def __init__(self, object_store: BaseObjectStore, refs: RefsContainer):
         """Open a repository.
 
         This shouldn't be called directly, but rather through one of the
         base classes, such as MemoryRepo or Repo.
 
         Args:
           object_store: Object store to use
           refs: Refs container to use
         """
         self.object_store = object_store
         self.refs = refs
 
         self._graftpoints = {}  # type: Dict[bytes, List[bytes]]
         self.hooks = {}  # type: Dict[str, Hook]
 
     def _determine_file_mode(self) -> bool:
         """Probe the file-system to determine whether permissions can be trusted.
 
         Returns: True if permissions can be trusted, False otherwise.
         """
         raise NotImplementedError(self._determine_file_mode)
 
     def _init_files(self, bare: bool) -> None:
         """Initialize a default set of named files."""
         from dulwich.config import ConfigFile
 
         self._put_named_file("description", b"Unnamed repository")
         f = BytesIO()
         cf = ConfigFile()
         cf.set("core", "repositoryformatversion", "0")
         if self._determine_file_mode():
             cf.set("core", "filemode", True)
         else:
             cf.set("core", "filemode", False)
 
         cf.set("core", "bare", bare)
         cf.set("core", "logallrefupdates", True)
         cf.write_to_file(f)
         self._put_named_file("config", f.getvalue())
         self._put_named_file(os.path.join("info", "exclude"), b"")
 
     def get_named_file(self, path):
         """Get a file from the control dir with a specific name.
 
         Although the filename should be interpreted as a filename relative to
         the control dir in a disk-based Repo, the object returned need not be
         pointing to a file in that location.
 
         Args:
           path: The path to the file, relative to the control dir.
         Returns: An open file object, or None if the file does not exist.
         """
         raise NotImplementedError(self.get_named_file)
 
     def _put_named_file(self, path, contents):
         """Write a file to the control dir with the given name and contents.
 
         Args:
           path: The path to the file, relative to the control dir.
           contents: A string to write to the file.
         """
         raise NotImplementedError(self._put_named_file)
 
     def _del_named_file(self, path):
         """Delete a file in the contrl directory with the given name."""
         raise NotImplementedError(self._del_named_file)
 
     def open_index(self):
         """Open the index for this repository.
 
         Raises:
           NoIndexPresent: If no index is present
         Returns: The matching `Index`
         """
         raise NotImplementedError(self.open_index)
 
     def fetch(self, target, determine_wants=None, progress=None, depth=None):
         """Fetch objects into another repository.
 
         Args:
           target: The target repository
           determine_wants: Optional function to determine what refs to
             fetch.
           progress: Optional progress function
           depth: Optional shallow fetch depth
         Returns: The local refs
         """
         if determine_wants is None:
             determine_wants = target.object_store.determine_wants_all
         count, pack_data = self.fetch_pack_data(
             determine_wants,
             target.get_graph_walker(),
             progress=progress,
             depth=depth,
         )
         target.object_store.add_pack_data(count, pack_data, progress)
         return self.get_refs()
 
     def fetch_pack_data(
         self,
         determine_wants,
         graph_walker,
         progress,
         get_tagged=None,
         depth=None,
     ):
         """Fetch the pack data required for a set of revisions.
 
         Args:
           determine_wants: Function that takes a dictionary with heads
             and returns the list of heads to fetch.
           graph_walker: Object that can iterate over the list of revisions
             to fetch and has an "ack" method that will be called to acknowledge
             that a revision is present.
           progress: Simple progress function that will be called with
             updated progress strings.
           get_tagged: Function that returns a dict of pointed-to sha ->
             tag sha for including tags.
           depth: Shallow fetch depth
         Returns: count and iterator over pack data
         """
         # TODO(jelmer): Fetch pack data directly, don't create objects first.
         objects = self.fetch_objects(
             determine_wants, graph_walker, progress, get_tagged, depth=depth
         )
         return pack_objects_to_data(objects)
 
     def fetch_objects(
         self,
         determine_wants,
         graph_walker,
         progress,
         get_tagged=None,
         depth=None,
     ):
         """Fetch the missing objects required for a set of revisions.
 
         Args:
           determine_wants: Function that takes a dictionary with heads
             and returns the list of heads to fetch.
           graph_walker: Object that can iterate over the list of revisions
             to fetch and has an "ack" method that will be called to acknowledge
             that a revision is present.
           progress: Simple progress function that will be called with
             updated progress strings.
           get_tagged: Function that returns a dict of pointed-to sha ->
             tag sha for including tags.
           depth: Shallow fetch depth
         Returns: iterator over objects, with __len__ implemented
         """
         if depth not in (None, 0):
             raise NotImplementedError("depth not supported yet")
 
         refs = {}
         for ref, sha in self.get_refs().items():
             try:
                 obj = self.object_store[sha]
             except KeyError:
                 warnings.warn(
                     "ref %s points at non-present sha %s"
                     % (ref.decode("utf-8", "replace"), sha.decode("ascii")),
                     UserWarning,
                 )
                 continue
             else:
                 if isinstance(obj, Tag):
                     refs[ref + ANNOTATED_TAG_SUFFIX] = obj.object[1]
                 refs[ref] = sha
 
         wants = determine_wants(refs)
         if not isinstance(wants, list):
             raise TypeError("determine_wants() did not return a list")
 
         shallows = getattr(graph_walker, "shallow", frozenset())
         unshallows = getattr(graph_walker, "unshallow", frozenset())
 
         if wants == []:
             # TODO(dborowitz): find a way to short-circuit that doesn't change
             # this interface.
 
             if shallows or unshallows:
                 # Do not send a pack in shallow short-circuit path
                 return None
 
             return []
 
         # If the graph walker is set up with an implementation that can
         # ACK/NAK to the wire, it will write data to the client through
         # this call as a side-effect.
         haves = self.object_store.find_common_revisions(graph_walker)
 
         # Deal with shallow requests separately because the haves do
         # not reflect what objects are missing
         if shallows or unshallows:
             # TODO: filter the haves commits from iter_shas. the specific
             # commits aren't missing.
             haves = []
 
         parents_provider = ParentsProvider(self.object_store, shallows=shallows)
 
         def get_parents(commit):
             return parents_provider.get_parents(commit.id, commit)
 
         return self.object_store.iter_shas(
             self.object_store.find_missing_objects(
                 haves,
                 wants,
                 self.get_shallow(),
                 progress,
                 get_tagged,
                 get_parents=get_parents,
             )
         )
 
     def generate_pack_data(self, have, want, progress=None, ofs_delta=None):
         """Generate pack data objects for a set of wants/haves.
 
         Args:
           have: List of SHA1s of objects that should not be sent
           want: List of SHA1s of objects that should be sent
           ofs_delta: Whether OFS deltas can be included
           progress: Optional progress reporting method
         """
         return self.object_store.generate_pack_data(
             have,
             want,
             shallow=self.get_shallow(),
             progress=progress,
             ofs_delta=ofs_delta,
         )
 
     def get_graph_walker(self, heads=None):
         """Retrieve a graph walker.
 
         A graph walker is used by a remote repository (or proxy)
         to find out which objects are present in this repository.
 
         Args:
           heads: Repository heads to use (optional)
         Returns: A graph walker object
         """
         if heads is None:
             heads = [
                 sha
                 for sha in self.refs.as_dict(b"refs/heads").values()
                 if sha in self.object_store
             ]
         parents_provider = ParentsProvider(self.object_store)
         return ObjectStoreGraphWalker(
             heads, parents_provider.get_parents, shallow=self.get_shallow()
         )
 
     def get_refs(self) -> Dict[bytes, bytes]:
         """Get dictionary with all refs.
 
         Returns: A ``dict`` mapping ref names to SHA1s
         """
         return self.refs.as_dict()
 
     def head(self) -> bytes:
         """Return the SHA1 pointed at by HEAD."""
         return self.refs[b"HEAD"]
 
     def _get_object(self, sha, cls):
         assert len(sha) in (20, 40)
         ret = self.get_object(sha)
         if not isinstance(ret, cls):
             if cls is Commit:
                 raise NotCommitError(ret)
             elif cls is Blob:
                 raise NotBlobError(ret)
             elif cls is Tree:
                 raise NotTreeError(ret)
             elif cls is Tag:
                 raise NotTagError(ret)
             else:
                 raise Exception(
                     "Type invalid: %r != %r" % (ret.type_name, cls.type_name)
                 )
         return ret
 
     def get_object(self, sha: bytes) -> ShaFile:
         """Retrieve the object with the specified SHA.
 
         Args:
           sha: SHA to retrieve
         Returns: A ShaFile object
         Raises:
           KeyError: when the object can not be found
         """
         return self.object_store[sha]
 
     def parents_provider(self):
         return ParentsProvider(
             self.object_store,
             grafts=self._graftpoints,
             shallows=self.get_shallow(),
         )
 
     def get_parents(self, sha: bytes, commit: Commit = None) -> List[bytes]:
         """Retrieve the parents of a specific commit.
 
         If the specific commit is a graftpoint, the graft parents
         will be returned instead.
 
         Args:
           sha: SHA of the commit for which to retrieve the parents
           commit: Optional commit matching the sha
         Returns: List of parents
         """
         return self.parents_provider().get_parents(sha, commit)
 
     def get_config(self):
         """Retrieve the config object.
 
         Returns: `ConfigFile` object for the ``.git/config`` file.
         """
         raise NotImplementedError(self.get_config)
 
     def get_description(self):
         """Retrieve the description for this repository.
 
         Returns: String with the description of the repository
             as set by the user.
         """
         raise NotImplementedError(self.get_description)
 
     def set_description(self, description):
         """Set the description for this repository.
 
         Args:
           description: Text to set as description for this repository.
         """
         raise NotImplementedError(self.set_description)
 
     def get_config_stack(self) -> "StackedConfig":
         """Return a config stack for this repository.
 
         This stack accesses the configuration for both this repository
         itself (.git/config) and the global configuration, which usually
         lives in ~/.gitconfig.
 
         Returns: `Config` instance for this repository
         """
         from dulwich.config import StackedConfig
 
         backends = [self.get_config()] + StackedConfig.default_backends()
         return StackedConfig(backends, writable=backends[0])
 
     def get_shallow(self):
         """Get the set of shallow commits.
 
         Returns: Set of shallow commits.
         """
         f = self.get_named_file("shallow")
         if f is None:
             return set()
         with f:
             return {line.strip() for line in f}
 
     def update_shallow(self, new_shallow, new_unshallow):
         """Update the list of shallow objects.
 
         Args:
           new_shallow: Newly shallow objects
           new_unshallow: Newly no longer shallow objects
         """
         shallow = self.get_shallow()
         if new_shallow:
             shallow.update(new_shallow)
         if new_unshallow:
             shallow.difference_update(new_unshallow)
         if shallow:
             self._put_named_file(
                 "shallow", b"".join([sha + b"\n" for sha in shallow])
             )
         else:
             self._del_named_file("shallow")
 
     def get_peeled(self, ref):
         """Get the peeled value of a ref.
 
         Args:
           ref: The refname to peel.
         Returns: The fully-peeled SHA1 of a tag object, after peeling all
             intermediate tags; if the original ref does not point to a tag,
             this will equal the original SHA1.
         """
         cached = self.refs.get_peeled(ref)
         if cached is not None:
             return cached
         return self.object_store.peel_sha(self.refs[ref]).id
 
     def get_walker(self, include=None, *args, **kwargs):
         """Obtain a walker for this repository.
 
         Args:
           include: Iterable of SHAs of commits to include along with their
             ancestors. Defaults to [HEAD]
           exclude: Iterable of SHAs of commits to exclude along with their
             ancestors, overriding includes.
           order: ORDER_* constant specifying the order of results.
             Anything other than ORDER_DATE may result in O(n) memory usage.
           reverse: If True, reverse the order of output, requiring O(n)
             memory.
           max_entries: The maximum number of entries to yield, or None for
             no limit.
           paths: Iterable of file or subtree paths to show entries for.
           rename_detector: diff.RenameDetector object for detecting
             renames.
           follow: If True, follow path across renames/copies. Forces a
             default rename_detector.
           since: Timestamp to list commits after.
           until: Timestamp to list commits before.
           queue_cls: A class to use for a queue of commits, supporting the
             iterator protocol. The constructor takes a single argument, the
             Walker.
         Returns: A `Walker` object
         """
         from dulwich.walk import Walker
 
         if include is None:
             include = [self.head()]
         if isinstance(include, str):
             include = [include]
 
         kwargs["get_parents"] = lambda commit: self.get_parents(commit.id, commit)
 
         return Walker(self.object_store, include, *args, **kwargs)
 
     def __getitem__(self, name):
         """Retrieve a Git object by SHA1 or ref.
 
         Args:
           name: A Git object SHA1 or a ref name
         Returns: A `ShaFile` object, such as a Commit or Blob
         Raises:
           KeyError: when the specified ref or object does not exist
         """
         if not isinstance(name, bytes):
             raise TypeError(
                 "'name' must be bytestring, not %.80s" % type(name).__name__
             )
         if len(name) in (20, 40):
             try:
                 return self.object_store[name]
             except (KeyError, ValueError):
                 pass
         try:
             return self.object_store[self.refs[name]]
         except RefFormatError:
             raise KeyError(name)
 
     def __contains__(self, name: bytes) -> bool:
         """Check if a specific Git object or ref is present.
 
         Args:
           name: Git object SHA1 or ref name
         """
         if len(name) == 20 or (len(name) == 40 and valid_hexsha(name)):
             return name in self.object_store or name in self.refs
         else:
             return name in self.refs
 
     def __setitem__(self, name: bytes, value: Union[ShaFile, bytes]):
         """Set a ref.
 
         Args:
           name: ref name
           value: Ref value - either a ShaFile object, or a hex sha
         """
         if name.startswith(b"refs/") or name == b"HEAD":
             if isinstance(value, ShaFile):
                 self.refs[name] = value.id
             elif isinstance(value, bytes):
                 self.refs[name] = value
             else:
                 raise TypeError(value)
         else:
             raise ValueError(name)
 
     def __delitem__(self, name: bytes):
         """Remove a ref.
 
         Args:
           name: Name of the ref to remove
         """
         if name.startswith(b"refs/") or name == b"HEAD":
             del self.refs[name]
         else:
             raise ValueError(name)
 
     def _get_user_identity(self, config: "StackedConfig", kind: str = None) -> bytes:
         """Determine the identity to use for new commits."""
         # TODO(jelmer): Deprecate this function in favor of get_user_identity
         return get_user_identity(config)
 
     def _add_graftpoints(self, updated_graftpoints: Dict[bytes, List[bytes]]):
         """Add or modify graftpoints
 
         Args:
           updated_graftpoints: Dict of commit shas to list of parent shas
         """
 
         # Simple validation
         for commit, parents in updated_graftpoints.items():
             for sha in [commit] + parents:
                 check_hexsha(sha, "Invalid graftpoint")
 
         self._graftpoints.update(updated_graftpoints)
 
     def _remove_graftpoints(self, to_remove: List[bytes] = []) -> None:
         """Remove graftpoints
 
         Args:
           to_remove: List of commit shas
         """
         for sha in to_remove:
             del self._graftpoints[sha]
 
     def _read_heads(self, name):
         f = self.get_named_file(name)
         if f is None:
             return []
         with f:
             return [line.strip() for line in f.readlines() if line.strip()]
 
     def do_commit(  # noqa: C901
         self,
         message=None,
         committer=None,
         author=None,
         commit_timestamp=None,
         commit_timezone=None,
         author_timestamp=None,
         author_timezone=None,
         tree=None,
         encoding=None,
         ref=b"HEAD",
         merge_heads=None,
         no_verify=False,
     ):
         """Create a new commit.
 
         If not specified, `committer` and `author` default to
         get_user_identity(..., 'COMMITTER')
         and get_user_identity(..., 'AUTHOR') respectively.
 
         Args:
           message: Commit message
           committer: Committer fullname
           author: Author fullname
           commit_timestamp: Commit timestamp (defaults to now)
           commit_timezone: Commit timestamp timezone (defaults to GMT)
           author_timestamp: Author timestamp (defaults to commit
             timestamp)
           author_timezone: Author timestamp timezone
             (defaults to commit timestamp timezone)
           tree: SHA1 of the tree root to use (if not specified the
             current index will be committed).
           encoding: Encoding
           ref: Optional ref to commit to (defaults to current branch)
           merge_heads: Merge heads (defaults to .git/MERGE_HEAD)
           no_verify: Skip pre-commit and commit-msg hooks
 
         Returns:
           New commit SHA1
         """
 
         try:
             if not no_verify:
                 self.hooks["pre-commit"].execute()
         except HookError as e:
             raise CommitError(e)
         except KeyError:  # no hook defined, silent fallthrough
             pass
 
         c = Commit()
         if tree is None:
             index = self.open_index()
             c.tree = index.commit(self.object_store)
         else:
             if len(tree) != 40:
                 raise ValueError("tree must be a 40-byte hex sha string")
             c.tree = tree
 
         config = self.get_config_stack()
         if merge_heads is None:
             merge_heads = self._read_heads("MERGE_HEAD")
         if committer is None:
             committer = get_user_identity(config, kind="COMMITTER")
         check_user_identity(committer)
         c.committer = committer
         if commit_timestamp is None:
             # FIXME: Support GIT_COMMITTER_DATE environment variable
             commit_timestamp = time.time()
         c.commit_time = int(commit_timestamp)
         if commit_timezone is None:
             # FIXME: Use current user timezone rather than UTC
             commit_timezone = 0
         c.commit_timezone = commit_timezone
         if author is None:
             author = get_user_identity(config, kind="AUTHOR")
         c.author = author
         check_user_identity(author)
         if author_timestamp is None:
             # FIXME: Support GIT_AUTHOR_DATE environment variable
             author_timestamp = commit_timestamp
         c.author_time = int(author_timestamp)
         if author_timezone is None:
             author_timezone = commit_timezone
         c.author_timezone = author_timezone
         if encoding is None:
             try:
                 encoding = config.get(("i18n",), "commitEncoding")
             except KeyError:
                 pass  # No dice
         if encoding is not None:
             c.encoding = encoding
         if message is None:
             # FIXME: Try to read commit message from .git/MERGE_MSG
             raise ValueError("No commit message specified")
 
         try:
             if no_verify:
                 c.message = message
             else:
                 c.message = self.hooks["commit-msg"].execute(message)
                 if c.message is None:
                     c.message = message
         except HookError as e:
             raise CommitError(e)
         except KeyError:  # no hook defined, message not modified
             c.message = message
 
         if ref is None:
             # Create a dangling commit
             c.parents = merge_heads
             self.object_store.add_object(c)
         else:
             try:
                 old_head = self.refs[ref]
                 c.parents = [old_head] + merge_heads
                 self.object_store.add_object(c)
                 ok = self.refs.set_if_equals(
                     ref,
                     old_head,
                     c.id,
                     message=b"commit: " + message,
                     committer=committer,
                     timestamp=commit_timestamp,
                     timezone=commit_timezone,
                 )
             except KeyError:
                 c.parents = merge_heads
                 self.object_store.add_object(c)
                 ok = self.refs.add_if_new(
                     ref,
                     c.id,
                     message=b"commit: " + message,
                     committer=committer,
                     timestamp=commit_timestamp,
                     timezone=commit_timezone,
                 )
             if not ok:
                 # Fail if the atomic compare-and-swap failed, leaving the
                 # commit and all its objects as garbage.
                 raise CommitError("%s changed during commit" % (ref,))
 
         self._del_named_file("MERGE_HEAD")
 
         try:
             self.hooks["post-commit"].execute()
         except HookError as e:  # silent failure
             warnings.warn("post-commit hook failed: %s" % e, UserWarning)
         except KeyError:  # no hook defined, silent fallthrough
             pass
 
         return c.id
 
 
 def read_gitfile(f):
     """Read a ``.git`` file.
 
     The first line of the file should start with "gitdir: "
 
     Args:
       f: File-like object to read from
     Returns: A path
     """
     cs = f.read()
     if not cs.startswith("gitdir: "):
         raise ValueError("Expected file to start with 'gitdir: '")
     return cs[len("gitdir: ") :].rstrip("\n")
 
 
 class UnsupportedVersion(Exception):
     """Unsupported repository version."""
 
     def __init__(self, version):
         self.version = version
 
 
 class Repo(BaseRepo):
     """A git repository backed by local disk.
 
     To open an existing repository, call the contructor with
     the path of the repository.
 
     To create a new repository, use the Repo.init class method.
     """
 
     def __init__(self, root, object_store=None, bare=None):
         hidden_path = os.path.join(root, CONTROLDIR)
         if bare is None:
             if (os.path.isfile(hidden_path) or
                     os.path.isdir(os.path.join(hidden_path, OBJECTDIR))):
                 bare = False
             elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
                     os.path.isdir(os.path.join(root, REFSDIR))):
                 bare = True
             else:
                 raise NotGitRepository(
                     "No git repository was found at %(path)s" % dict(path=root)
                 )
 
         self.bare = bare
         if bare is False:
             if os.path.isfile(hidden_path):
                 with open(hidden_path, "r") as f:
                     path = read_gitfile(f)
                 self._controldir = os.path.join(root, path)
             else:
                 self._controldir = hidden_path
         else:
             self._controldir = root
         commondir = self.get_named_file(COMMONDIR)
         if commondir is not None:
             with commondir:
                 self._commondir = os.path.join(
                     self.controldir(),
                     os.fsdecode(commondir.read().rstrip(b"\r\n")),
                 )
         else:
             self._commondir = self._controldir
         self.path = root
         config = self.get_config()
         try:
             format_version = int(config.get("core", "repositoryformatversion"))
         except KeyError:
             format_version = 0
         if format_version != 0:
             raise UnsupportedVersion(format_version)
         if object_store is None:
             object_store = DiskObjectStore.from_config(
                 os.path.join(self.commondir(), OBJECTDIR), config
             )
         refs = DiskRefsContainer(
             self.commondir(), self._controldir, logger=self._write_reflog
         )
         BaseRepo.__init__(self, object_store, refs)
 
         self._graftpoints = {}
         graft_file = self.get_named_file(
             os.path.join("info", "grafts"), basedir=self.commondir()
         )
         if graft_file:
             with graft_file:
                 self._graftpoints.update(parse_graftpoints(graft_file))
         graft_file = self.get_named_file("shallow", basedir=self.commondir())
         if graft_file:
             with graft_file:
                 self._graftpoints.update(parse_graftpoints(graft_file))
 
         self.hooks["pre-commit"] = PreCommitShellHook(self.path, self.controldir())
         self.hooks["commit-msg"] = CommitMsgShellHook(self.controldir())
         self.hooks["post-commit"] = PostCommitShellHook(self.controldir())
         self.hooks["post-receive"] = PostReceiveShellHook(self.controldir())
 
     def _write_reflog(
         self, ref, old_sha, new_sha, committer, timestamp, timezone, message
     ):
         from .reflog import format_reflog_line
 
         path = os.path.join(self.controldir(), "logs", os.fsdecode(ref))
         try:
             os.makedirs(os.path.dirname(path))
         except FileExistsError:
             pass
         if committer is None:
             config = self.get_config_stack()
             committer = self._get_user_identity(config)
         check_user_identity(committer)
         if timestamp is None:
             timestamp = int(time.time())
         if timezone is None:
             timezone = 0  # FIXME
         with open(path, "ab") as f:
             f.write(
                 format_reflog_line(
                     old_sha, new_sha, committer, timestamp, timezone, message
                 )
                 + b"\n"
             )
 
     @classmethod
     def discover(cls, start="."):
         """Iterate parent directories to discover a repository
 
         Return a Repo object for the first parent directory that looks like a
         Git repository.
 
         Args:
           start: The directory to start discovery from (defaults to '.')
         """
         remaining = True
         path = os.path.abspath(start)
         while remaining:
             try:
                 return cls(path)
             except NotGitRepository:
                 path, remaining = os.path.split(path)
         raise NotGitRepository(
             "No git repository was found at %(path)s" % dict(path=start)
         )
 
     def controldir(self):
         """Return the path of the control directory."""
         return self._controldir
 
     def commondir(self):
         """Return the path of the common directory.
 
         For a main working tree, it is identical to controldir().
 
         For a linked working tree, it is the control directory of the
         main working tree."""
 
         return self._commondir
 
     def _determine_file_mode(self):
         """Probe the file-system to determine whether permissions can be trusted.
 
         Returns: True if permissions can be trusted, False otherwise.
         """
         fname = os.path.join(self.path, ".probe-permissions")
         with open(fname, "w") as f:
             f.write("")
 
         st1 = os.lstat(fname)
         try:
             os.chmod(fname, st1.st_mode ^ stat.S_IXUSR)
         except PermissionError:
             return False
         st2 = os.lstat(fname)
 
         os.unlink(fname)
 
         mode_differs = st1.st_mode != st2.st_mode
         st2_has_exec = (st2.st_mode & stat.S_IXUSR) != 0
 
         return mode_differs and st2_has_exec
 
     def _put_named_file(self, path, contents):
         """Write a file to the control dir with the given name and contents.
 
         Args:
           path: The path to the file, relative to the control dir.
           contents: A string to write to the file.
         """
         path = path.lstrip(os.path.sep)
         with GitFile(os.path.join(self.controldir(), path), "wb") as f:
             f.write(contents)
 
     def _del_named_file(self, path):
         try:
             os.unlink(os.path.join(self.controldir(), path))
         except FileNotFoundError:
             return
 
     def get_named_file(self, path, basedir=None):
         """Get a file from the control dir with a specific name.
 
         Although the filename should be interpreted as a filename relative to
         the control dir in a disk-based Repo, the object returned need not be
         pointing to a file in that location.
 
         Args:
           path: The path to the file, relative to the control dir.
           basedir: Optional argument that specifies an alternative to the
             control dir.
         Returns: An open file object, or None if the file does not exist.
         """
         # TODO(dborowitz): sanitize filenames, since this is used directly by
         # the dumb web serving code.
         if basedir is None:
             basedir = self.controldir()
         path = path.lstrip(os.path.sep)
         try:
             return open(os.path.join(basedir, path), "rb")
         except FileNotFoundError:
             return None
 
     def index_path(self):
         """Return path to the index file."""
         return os.path.join(self.controldir(), INDEX_FILENAME)
 
     def open_index(self) -> "Index":
         """Open the index for this repository.
 
         Raises:
           NoIndexPresent: If no index is present
         Returns: The matching `Index`
         """
         from dulwich.index import Index
 
         if not self.has_index():
             raise NoIndexPresent()
         return Index(self.index_path())
 
     def has_index(self):
         """Check if an index is present."""
         # Bare repos must never have index files; non-bare repos may have a
         # missing index file, which is treated as empty.
         return not self.bare
 
     def stage(self, fs_paths: Union[str, bytes, os.PathLike, Iterable[Union[str, bytes, os.PathLike]]]) -> None:
         """Stage a set of paths.
 
         Args:
           fs_paths: List of paths, relative to the repository path
         """
 
         root_path_bytes = os.fsencode(self.path)
 
         if isinstance(fs_paths, (str, bytes, os.PathLike)):
             fs_paths = [fs_paths]
         fs_paths = list(fs_paths)
 
         from dulwich.index import (
             blob_from_path_and_stat,
             index_entry_from_stat,
             _fs_to_tree_path,
         )
 
         index = self.open_index()
         blob_normalizer = self.get_blob_normalizer()
         for fs_path in fs_paths:
             if not isinstance(fs_path, bytes):
                 fs_path = os.fsencode(fs_path)
             if os.path.isabs(fs_path):
                 raise ValueError(
                     "path %r should be relative to "
                     "repository root, not absolute" % fs_path
                 )
             tree_path = _fs_to_tree_path(fs_path)
             full_path = os.path.join(root_path_bytes, fs_path)
             try:
                 st = os.lstat(full_path)
             except OSError:
                 # File no longer exists
                 try:
                     del index[tree_path]
                 except KeyError:
                     pass  # already removed
             else:
                 if not stat.S_ISREG(st.st_mode) and not stat.S_ISLNK(st.st_mode):
                     try:
                         del index[tree_path]
                     except KeyError:
                         pass
                 else:
                     blob = blob_from_path_and_stat(full_path, st)
                     blob = blob_normalizer.checkin_normalize(blob, fs_path)
                     self.object_store.add_object(blob)
                     index[tree_path] = index_entry_from_stat(st, blob.id, 0)
         index.write()
 
     def unstage(self, fs_paths: List[str]):
         """unstage specific file in the index
         Args:
           fs_paths: a list of files to unstage,
             relative to the repository path
         """
         from dulwich.index import (
             IndexEntry,
             _fs_to_tree_path,
             )
 
         index = self.open_index()
         try:
             tree_id = self[b'HEAD'].tree
         except KeyError:
             # no head mean no commit in the repo
             for fs_path in fs_paths:
                 tree_path = _fs_to_tree_path(fs_path)
                 del index[tree_path]
             index.write()
             return
 
         for fs_path in fs_paths:
             tree_path = _fs_to_tree_path(fs_path)
             try:
                 tree_entry = self.object_store[tree_id].lookup_path(
                     self.object_store.__getitem__, tree_path)
             except KeyError:
                 # if tree_entry didnt exist, this file was being added, so
                 # remove index entry
                 try:
                     del index[tree_path]
                     continue
                 except KeyError:
                     raise KeyError("file '%s' not in index" % (tree_path.decode()))
 
             st = None
             try:
                 st = os.lstat(os.path.join(self.path, fs_path))
             except FileNotFoundError:
                 pass
 
             index_entry = IndexEntry(
                 ctime=(self[b'HEAD'].commit_time, 0),
                 mtime=(self[b'HEAD'].commit_time, 0),
                 dev=st.st_dev if st else 0,
                 ino=st.st_ino if st else 0,
                 mode=tree_entry[0],
                 uid=st.st_uid if st else 0,
                 gid=st.st_gid if st else 0,
                 size=len(self[tree_entry[1]].data),
                 sha=tree_entry[1],
                 flags=0,
                 extended_flags=0
             )
 
             index[tree_path] = index_entry
         index.write()
 
     def clone(
         self,
         target_path,
         mkdir=True,
         bare=False,
         origin=b"origin",
         checkout=None,
         branch=None,
     ):
         """Clone this repository.
 
         Args:
           target_path: Target path
           mkdir: Create the target directory
           bare: Whether to create a bare repository
           checkout: Whether or not to check-out HEAD after cloning
           origin: Base name for refs in target repository
             cloned from this repository
           branch: Optional branch or tag to be used as HEAD in the new repository
             instead of this repository's HEAD.
         Returns: Created repository as `Repo`
         """
 
         def clone_refs(target_repo, ref_message):
             self.fetch(target_repo)
             target_repo.refs.import_refs(
                 b"refs/remotes/" + origin,
                 self.refs.as_dict(b"refs/heads"),
                 message=ref_message,
             )
             target_repo.refs.import_refs(
                 b"refs/tags", self.refs.as_dict(b"refs/tags"), message=ref_message
             )
 
             head_chain, sha = self.refs.follow(b"HEAD")
             head_chain = head_chain[-1] if head_chain else None
             return head_chain, sha
 
         encoded_path = self.path
         if not isinstance(encoded_path, bytes):
             encoded_path = os.fsencode(encoded_path)
 
-        return self.do_clone(
+        return do_clone(
             encoded_path,
             target_path,
             clone_refs=clone_refs,
             mkdir=mkdir,
             bare=bare,
             origin=origin,
             checkout=checkout,
             branch=branch,
         )
 
-    @classmethod
-    def do_clone(
-        cls,
-        source_path,
-        target_path,
-        clone_refs=None,
-        mkdir=True,
-        bare=False,
-        origin=b"origin",
-        checkout=None,
-        errstream=None,
-        branch=None,
-    ):
-        if not clone_refs:
-            raise ValueError("clone_refs callback is required")
-
-        if not bare:
-            target = cls.init(target_path, mkdir=mkdir)
-            if checkout is None:
-                checkout = True
-        else:
-            if checkout:
-                raise ValueError("checkout and bare are incompatible")
-            target = cls.init_bare(target_path, mkdir=mkdir)
-
-        try:
-            target_config = target.get_config()
-            target_config.set((b"remote", origin), b"url", source_path)
-            target_config.set(
-                (b"remote", origin),
-                b"fetch",
-                b"+refs/heads/*:refs/remotes/" + origin + b"/*",
-            )
-            target_config.write_to_path()
-
-            ref_message = b"clone: from " + source_path
-            origin_head, origin_sha = clone_refs(target, ref_message)
-            if origin_sha and not origin_head:
-                # set detached HEAD
-                target.refs[b"HEAD"] = origin_sha
-
-            cls._clone_set_origin_head(target, origin, origin_head)
-            head_ref = cls._clone_set_default_branch(
-                target, origin, origin_head, branch, ref_message
-            )
-
-            # Update target head
-            if head_ref:
-                head = cls._clone_set_head(target, head_ref, ref_message)
-            else:
-                head = None
-
-            if checkout and head is not None:
-                if errstream:
-                    errstream.write(b"Checking out " + head + b"\n")
-                target.reset_index()
-        except BaseException:
-            target.close()
-            raise
-
-        return target
-
-    @staticmethod
-    def _clone_set_origin_head(r, origin, origin_head):
-        # set refs/remotes/origin/HEAD
-        origin_base = b"refs/remotes/" + origin + b"/"
-        if origin_head and origin_head.startswith(LOCAL_BRANCH_PREFIX):
-            origin_ref = origin_base + b"HEAD"
-            target_ref = origin_base + origin_head[len(LOCAL_BRANCH_PREFIX) :]
-            if target_ref in r.refs:
-                r.refs.set_symbolic_ref(origin_ref, target_ref)
-
-    @staticmethod
-    def _clone_set_default_branch(r, origin, origin_head, branch, ref_message):
-        origin_base = b"refs/remotes/" + origin + b"/"
-        if branch:
-            origin_ref = origin_base + branch
-            if origin_ref in r.refs:
-                local_ref = LOCAL_BRANCH_PREFIX + branch
-                r.refs.add_if_new(
-                    local_ref, r.refs[origin_ref], ref_message
-                )
-                head_ref = local_ref
-            elif LOCAL_TAG_PREFIX + branch in r.refs:
-                head_ref = LOCAL_TAG_PREFIX + branch
-            else:
-                raise ValueError(
-                    "%s is not a valid branch or tag" % os.fsencode(branch)
-                )
-        elif origin_head:
-            head_ref = origin_head
-            if origin_head.startswith(LOCAL_BRANCH_PREFIX):
-                origin_ref = origin_base + origin_head[len(LOCAL_BRANCH_PREFIX) :]
-            else:
-                origin_ref = origin_head
-            try:
-                r.refs.add_if_new(
-                    head_ref, r.refs[origin_ref], ref_message
-                )
-            except KeyError:
-                pass
-        return head_ref
-
-    @staticmethod
-    def _clone_set_head(r, head_ref, ref_message):
-        if head_ref.startswith(LOCAL_TAG_PREFIX):
-            # detach HEAD at specified tag
-            head = r.refs[head_ref]
-            if isinstance(head, Tag):
-                _cls, obj = head.object
-                head = obj.get_object(obj).id
-            del r.refs[b"HEAD"]
-            r.refs.set_if_equals(
-                b"HEAD", None, head, message=ref_message
-            )
-        else:
-            # set HEAD to specific branch
-            try:
-                head = r.refs[head_ref]
-                r.refs.set_symbolic_ref(b"HEAD", head_ref)
-                r.refs.set_if_equals(
-                    b"HEAD", None, head, message=ref_message
-                )
-            except KeyError:
-                head = None
-        return head
-
     def reset_index(self, tree=None):
         """Reset the index back to a specific tree.
 
         Args:
           tree: Tree SHA to reset to, None for current HEAD tree.
         """
         from dulwich.index import (
             build_index_from_tree,
             validate_path_element_default,
             validate_path_element_ntfs,
         )
 
         if tree is None:
             head = self[b"HEAD"]
             if isinstance(head, Tag):
                 _cls, obj = head.object
                 head = self.get_object(obj)
             tree = head.tree
         config = self.get_config()
         honor_filemode = config.get_boolean(b"core", b"filemode", os.name != "nt")
         if config.get_boolean(b"core", b"core.protectNTFS", os.name == "nt"):
             validate_path_element = validate_path_element_ntfs
         else:
             validate_path_element = validate_path_element_default
         return build_index_from_tree(
             self.path,
             self.index_path(),
             self.object_store,
             tree,
             honor_filemode=honor_filemode,
             validate_path_element=validate_path_element,
         )
 
     def get_config(self) -> "ConfigFile":
         """Retrieve the config object.
 
         Returns: `ConfigFile` object for the ``.git/config`` file.
         """
         from dulwich.config import ConfigFile
 
         path = os.path.join(self._controldir, "config")
         try:
             return ConfigFile.from_path(path)
         except FileNotFoundError:
             ret = ConfigFile()
             ret.path = path
             return ret
 
     def get_description(self):
         """Retrieve the description of this repository.
 
         Returns: A string describing the repository or None.
         """
         path = os.path.join(self._controldir, "description")
         try:
             with GitFile(path, "rb") as f:
                 return f.read()
         except FileNotFoundError:
             return None
 
     def __repr__(self):
         return "<Repo at %r>" % self.path
 
     def set_description(self, description):
         """Set the description for this repository.
 
         Args:
           description: Text to set as description for this repository.
         """
 
         self._put_named_file("description", description)
 
     @classmethod
     def _init_maybe_bare(cls, path, controldir, bare, object_store=None):
         for d in BASE_DIRECTORIES:
             os.mkdir(os.path.join(controldir, *d))
         if object_store is None:
             object_store = DiskObjectStore.init(os.path.join(controldir, OBJECTDIR))
         ret = cls(path, bare=bare, object_store=object_store)
         ret.refs.set_symbolic_ref(b"HEAD", DEFAULT_REF)
         ret._init_files(bare)
         return ret
 
     @classmethod
     def init(cls, path, mkdir=False):
         """Create a new repository.
 
         Args:
           path: Path in which to create the repository
           mkdir: Whether to create the directory
         Returns: `Repo` instance
         """
         if mkdir:
             os.mkdir(path)
         controldir = os.path.join(path, CONTROLDIR)
         os.mkdir(controldir)
         _set_filesystem_hidden(controldir)
         return cls._init_maybe_bare(path, controldir, False)
 
     @classmethod
     def _init_new_working_directory(cls, path, main_repo, identifier=None, mkdir=False):
         """Create a new working directory linked to a repository.
 
         Args:
           path: Path in which to create the working tree.
           main_repo: Main repository to reference
           identifier: Worktree identifier
           mkdir: Whether to create the directory
         Returns: `Repo` instance
         """
         if mkdir:
             os.mkdir(path)
         if identifier is None:
             identifier = os.path.basename(path)
         main_worktreesdir = os.path.join(main_repo.controldir(), WORKTREES)
         worktree_controldir = os.path.join(main_worktreesdir, identifier)
         gitdirfile = os.path.join(path, CONTROLDIR)
         with open(gitdirfile, "wb") as f:
             f.write(b"gitdir: " + os.fsencode(worktree_controldir) + b"\n")
         try:
             os.mkdir(main_worktreesdir)
         except FileExistsError:
             pass
         try:
             os.mkdir(worktree_controldir)
         except FileExistsError:
             pass
         with open(os.path.join(worktree_controldir, GITDIR), "wb") as f:
             f.write(os.fsencode(gitdirfile) + b"\n")
         with open(os.path.join(worktree_controldir, COMMONDIR), "wb") as f:
             f.write(b"../..\n")
         with open(os.path.join(worktree_controldir, "HEAD"), "wb") as f:
             f.write(main_repo.head() + b"\n")
         r = cls(path)
         r.reset_index()
         return r
 
     @classmethod
     def init_bare(cls, path, mkdir=False, object_store=None):
         """Create a new bare repository.
 
         ``path`` should already exist and be an empty directory.
 
         Args:
           path: Path to create bare repository in
         Returns: a `Repo` instance
         """
         if mkdir:
             os.mkdir(path)
         return cls._init_maybe_bare(path, path, True, object_store=object_store)
 
     create = init_bare
 
     def close(self):
         """Close any files opened by this repository."""
         self.object_store.close()
 
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
     def get_blob_normalizer(self):
         """Return a BlobNormalizer object"""
         # TODO Parse the git attributes files
         git_attributes = {}
         config_stack = self.get_config_stack()
         try:
             tree = self.object_store[self.refs[b"HEAD"]].tree
             return TreeBlobNormalizer(
                 config_stack,
                 git_attributes,
                 self.object_store,
                 tree,
             )
         except KeyError:
             return BlobNormalizer(config_stack, git_attributes)
 
 
 class MemoryRepo(BaseRepo):
     """Repo that stores refs, objects, and named files in memory.
 
     MemoryRepos are always bare: they have no working tree and no index, since
     those have a stronger dependency on the filesystem.
     """
 
     def __init__(self):
         from dulwich.config import ConfigFile
 
         self._reflog = []
         refs_container = DictRefsContainer({}, logger=self._append_reflog)
         BaseRepo.__init__(self, MemoryObjectStore(), refs_container)
         self._named_files = {}
         self.bare = True
         self._config = ConfigFile()
         self._description = None
 
     def _append_reflog(self, *args):
         self._reflog.append(args)
 
     def set_description(self, description):
         self._description = description
 
     def get_description(self):
         return self._description
 
     def _determine_file_mode(self):
         """Probe the file-system to determine whether permissions can be trusted.
 
         Returns: True if permissions can be trusted, False otherwise.
         """
         return sys.platform != "win32"
 
     def _put_named_file(self, path, contents):
         """Write a file to the control dir with the given name and contents.
 
         Args:
           path: The path to the file, relative to the control dir.
           contents: A string to write to the file.
         """
         self._named_files[path] = contents
 
     def _del_named_file(self, path):
         try:
             del self._named_files[path]
         except KeyError:
             pass
 
     def get_named_file(self, path, basedir=None):
         """Get a file from the control dir with a specific name.
 
         Although the filename should be interpreted as a filename relative to
         the control dir in a disk-baked Repo, the object returned need not be
         pointing to a file in that location.
 
         Args:
           path: The path to the file, relative to the control dir.
         Returns: An open file object, or None if the file does not exist.
         """
         contents = self._named_files.get(path, None)
         if contents is None:
             return None
         return BytesIO(contents)
 
     def open_index(self):
         """Fail to open index for this repo, since it is bare.
 
         Raises:
           NoIndexPresent: Raised when no index is present
         """
         raise NoIndexPresent()
 
     def get_config(self):
         """Retrieve the config object.
 
         Returns: `ConfigFile` object.
         """
         return self._config
 
     @classmethod
     def init_bare(cls, objects, refs):
         """Create a new bare repository in memory.
 
         Args:
           objects: Objects for the new repository,
             as iterable
           refs: Refs as dictionary, mapping names
             to object SHA1s
         """
         ret = cls()
         for obj in objects:
             ret.object_store.add_object(obj)
         for refname, sha in refs.items():
             ret.refs.add_if_new(refname, sha)
         ret._init_files(bare=True)
         return ret