diff --git a/dulwich/index.py b/dulwich/index.py index 9f40baef..04af3a0c 100644 --- a/dulwich/index.py +++ b/dulwich/index.py @@ -1,848 +1,848 @@ # index.py -- File parser/writer for the git index file # Copyright (C) 2008-2013 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Parser for the git index file format.""" import collections import errno import os import stat import struct import sys from dulwich.file import GitFile from dulwich.objects import ( Blob, S_IFGITLINK, S_ISGITLINK, Tree, hex_to_sha, sha_to_hex, ) from dulwich.pack import ( SHA1Reader, SHA1Writer, ) IndexEntry = collections.namedtuple( 'IndexEntry', [ 'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha', 'flags']) FLAG_STAGEMASK = 0x3000 FLAG_VALID = 0x8000 FLAG_EXTENDED = 0x4000 def pathsplit(path): """Split a /-delimited path into a directory part and a basename. Args: path: The path to split. Returns: Tuple with directory name and basename """ try: (dirname, basename) = path.rsplit(b"/", 1) except ValueError: return (b"", path) else: return (dirname, basename) def pathjoin(*args): """Join a /-delimited path. """ return b"/".join([p for p in args if p]) def read_cache_time(f): """Read a cache time. Args: f: File-like object to read from Returns: Tuple with seconds and nanoseconds """ return struct.unpack(">LL", f.read(8)) def write_cache_time(f, t): """Write a cache time. Args: f: File-like object to write to t: Time to write (as int, float or tuple with secs and nsecs) """ if isinstance(t, int): t = (t, 0) elif isinstance(t, float): (secs, nsecs) = divmod(t, 1.0) t = (int(secs), int(nsecs * 1000000000)) elif not isinstance(t, tuple): raise TypeError(t) f.write(struct.pack(">LL", *t)) def read_cache_entry(f): """Read an entry from a cache file. Args: f: File-like object to read from Returns: tuple with: device, inode, mode, uid, gid, size, sha, flags """ beginoffset = f.tell() ctime = read_cache_time(f) mtime = read_cache_time(f) (dev, ino, mode, uid, gid, size, sha, flags, ) = \ struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2)) name = f.read((flags & 0x0fff)) # Padding: real_size = ((f.tell() - beginoffset + 8) & ~7) f.read((beginoffset + real_size) - f.tell()) return (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha_to_hex(sha), flags & ~0x0fff) def write_cache_entry(f, entry): """Write an index entry to a file. Args: f: File object entry: Entry to write, tuple with: (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) """ beginoffset = f.tell() (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry write_cache_time(f, ctime) write_cache_time(f, mtime) flags = len(name) | (flags & ~0x0fff) f.write(struct.pack( b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags)) f.write(name) real_size = ((f.tell() - beginoffset + 8) & ~7) f.write(b'\0' * ((beginoffset + real_size) - f.tell())) def read_index(f): """Read an index file, yielding the individual entries.""" header = f.read(4) if header != b'DIRC': raise AssertionError("Invalid index file header: %r" % header) (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2)) assert version in (1, 2) for i in range(num_entries): yield read_cache_entry(f) def read_index_dict(f): """Read an index file and return it as a dictionary. Args: f: File object to read from """ ret = {} for x in read_index(f): ret[x[0]] = IndexEntry(*x[1:]) return ret def write_index(f, entries): """Write an index file. Args: f: File-like object to write to entries: Iterable over the entries to write """ f.write(b'DIRC') f.write(struct.pack(b'>LL', 2, len(entries))) for x in entries: write_cache_entry(f, x) def write_index_dict(f, entries): """Write an index file based on the contents of a dictionary. """ entries_list = [] for name in sorted(entries): entries_list.append((name,) + tuple(entries[name])) write_index(f, entries_list) def cleanup_mode(mode): """Cleanup a mode value. This will return a mode that can be stored in a tree object. Args: mode: Mode to clean up. """ if stat.S_ISLNK(mode): return stat.S_IFLNK elif stat.S_ISDIR(mode): return stat.S_IFDIR elif S_ISGITLINK(mode): return S_IFGITLINK ret = stat.S_IFREG | 0o644 if mode & 0o100: ret |= 0o111 return ret class Index(object): """A Git Index file.""" def __init__(self, filename): """Open an index file. Args: filename: Path to the index file """ self._filename = filename self.clear() self.read() @property def path(self): return self._filename def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._filename) def write(self): """Write current contents of index to disk.""" f = GitFile(self._filename, 'wb') try: f = SHA1Writer(f) write_index_dict(f, self._byname) finally: f.close() def read(self): """Read current contents of index from disk.""" if not os.path.exists(self._filename): return f = GitFile(self._filename, 'rb') try: f = SHA1Reader(f) for x in read_index(f): self[x[0]] = IndexEntry(*x[1:]) # FIXME: Additional data? f.read(os.path.getsize(self._filename)-f.tell()-20) f.check_sha() finally: f.close() def __len__(self): """Number of entries in this index file.""" return len(self._byname) def __getitem__(self, name): """Retrieve entry by relative path. Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) """ return self._byname[name] def __iter__(self): """Iterate over the paths in this index.""" return iter(self._byname) def get_sha1(self, path): """Return the (git object) SHA1 for the object at a path.""" return self[path].sha def get_mode(self, path): """Return the POSIX file mode for the object at a path.""" return self[path].mode def iterobjects(self): """Iterate over path, sha, mode tuples for use with commit_tree.""" for path in self: entry = self[path] yield path, entry.sha, cleanup_mode(entry.mode) def iterblobs(self): import warnings warnings.warn('Use iterobjects() instead.', PendingDeprecationWarning) return self.iterobjects() def clear(self): """Remove all contents from this index.""" self._byname = {} def __setitem__(self, name, x): assert isinstance(name, bytes) assert len(x) == 10 # Remove the old entry if any self._byname[name] = IndexEntry(*x) def __delitem__(self, name): assert isinstance(name, bytes) del self._byname[name] def iteritems(self): return self._byname.items() def items(self): return self._byname.items() def update(self, entries): for name, value in entries.items(): self[name] = value def changes_from_tree(self, object_store, tree, want_unchanged=False): """Find the differences between the contents of this index and a tree. Args: object_store: Object store to use for retrieving tree contents tree: SHA1 of the root tree want_unchanged: Whether unchanged files should be reported Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) """ def lookup_entry(path): entry = self[path] return entry.sha, cleanup_mode(entry.mode) for (name, mode, sha) in changes_from_tree( self._byname.keys(), lookup_entry, object_store, tree, want_unchanged=want_unchanged): yield (name, mode, sha) def commit(self, object_store): """Create a new tree from an index. Args: object_store: Object store to save the tree in Returns: Root tree SHA """ return commit_tree(object_store, self.iterobjects()) def commit_tree(object_store, blobs): """Commit a new tree. Args: object_store: Object store to add trees to blobs: Iterable over blob path, sha, mode entries Returns: SHA1 of the created tree. """ trees = {b'': {}} def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, bytes) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): tree = Tree() for basename, entry in trees[path].items(): if isinstance(entry, dict): mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) object_store.add_object(tree) return tree.id return build_tree(b'') def commit_index(object_store, index): """Create a new tree from an index. Args: object_store: Object store to save the tree in index: Index file Note: This function is deprecated, use index.commit() instead. Returns: Root tree sha. """ return commit_tree(object_store, index.iterobjects()) def changes_from_tree(names, lookup_entry, object_store, tree, want_unchanged=False): """Find the differences between the contents of a tree and a working copy. Args: names: Iterable of names in the working copy lookup_entry: Function to lookup an entry in the working copy object_store: Object store to use for retrieving tree contents tree: SHA1 of the root tree, or None for an empty tree want_unchanged: Whether unchanged files should be reported Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) """ # TODO(jelmer): Support a include_trees option other_names = set(names) if tree is not None: for (name, mode, sha) in object_store.iter_tree_contents(tree): try: (other_sha, other_mode) = lookup_entry(name) except KeyError: # Was removed yield ((name, None), (mode, None), (sha, None)) else: other_names.remove(name) if (want_unchanged or other_sha != sha or other_mode != mode): yield ((name, name), (mode, other_mode), (sha, other_sha)) # Mention added files for name in other_names: try: (other_sha, other_mode) = lookup_entry(name) except KeyError: pass else: yield ((None, name), (None, other_mode), (None, other_sha)) def index_entry_from_stat(stat_val, hex_sha, flags, mode=None): """Create a new index entry from a stat value. Args: stat_val: POSIX stat_result instance hex_sha: Hex sha of the object flags: Index flags """ if mode is None: mode = cleanup_mode(stat_val.st_mode) return IndexEntry( stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev, stat_val.st_ino, mode, stat_val.st_uid, stat_val.st_gid, stat_val.st_size, hex_sha, flags) def build_file_from_blob(blob, mode, target_path, honor_filemode=True): """Build a file or symlink on disk based on a Git object. Args: obj: The git object mode: File mode target_path: Path to write to honor_filemode: An optional flag to honor core.filemode setting in config file, default is core.filemode=True, change executable bit Returns: stat object for the file """ try: oldstat = os.lstat(target_path) except OSError as e: if e.errno == errno.ENOENT: oldstat = None else: raise contents = blob.as_raw_string() if stat.S_ISLNK(mode): # FIXME: This will fail on Windows. What should we do instead? if oldstat: os.unlink(target_path) if sys.platform == 'win32' and sys.version_info[0] == 3: # os.readlink on Python3 on Windows requires a unicode string. # TODO(jelmer): Don't assume tree_encoding == fs_encoding tree_encoding = sys.getfilesystemencoding() contents = contents.decode(tree_encoding) target_path = target_path.decode(tree_encoding) os.symlink(contents, target_path) else: if oldstat is not None and oldstat.st_size == len(contents): with open(target_path, 'rb') as f: if f.read() == contents: return oldstat with open(target_path, 'wb') as f: # Write out file f.write(contents) if honor_filemode: os.chmod(target_path, mode) return os.lstat(target_path) INVALID_DOTNAMES = (b".git", b".", b"..", b"") def validate_path_element_default(element): return element.lower() not in INVALID_DOTNAMES def validate_path_element_ntfs(element): stripped = element.rstrip(b". ").lower() if stripped in INVALID_DOTNAMES: return False if stripped == b"git~1": return False return True def validate_path(path, element_validator=validate_path_element_default): """Default path validator that just checks for .git/.""" parts = path.split(b"/") for p in parts: if not element_validator(p): return False else: return True def build_index_from_tree(root_path, index_path, object_store, tree_id, honor_filemode=True, validate_path_element=validate_path_element_default): """Generate and materialize index from a tree Args: tree_id: Tree to materialize root_path: Target dir for materialized index files index_path: Target path for generated index object_store: Non-empty object store holding tree contents honor_filemode: An optional flag to honor core.filemode setting in config file, default is core.filemode=True, change executable bit validate_path_element: Function to validate path elements to check out; default just refuses .git and .. directories. Note: existing index is wiped and contents are not merged in a working dir. Suitable only for fresh clones. """ index = Index(index_path) if not isinstance(root_path, bytes): root_path = root_path.encode(sys.getfilesystemencoding()) for entry in object_store.iter_tree_contents(tree_id): if not validate_path(entry.path, validate_path_element): continue full_path = _tree_to_fs_path(root_path, entry.path) if not os.path.exists(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) # TODO(jelmer): Merge new index into working tree if S_ISGITLINK(entry.mode): if not os.path.isdir(full_path): os.mkdir(full_path) st = os.lstat(full_path) # TODO(jelmer): record and return submodule paths else: obj = object_store[entry.sha] st = build_file_from_blob( obj, entry.mode, full_path, honor_filemode=honor_filemode) # Add file to index if not honor_filemode or S_ISGITLINK(entry.mode): # we can not use tuple slicing to build a new tuple, # because on windows that will convert the times to # longs, which causes errors further along st_tuple = (entry.mode, st.st_ino, st.st_dev, st.st_nlink, st.st_uid, st.st_gid, st.st_size, st.st_atime, st.st_mtime, st.st_ctime) st = st.__class__(st_tuple) index[entry.path] = index_entry_from_stat(st, entry.sha, 0) index.write() def blob_from_path_and_stat(fs_path, st): """Create a blob from a path and a stat object. Args: fs_path: Full file system path to file st: A stat object Returns: A `Blob` object """ assert isinstance(fs_path, bytes) blob = Blob() if stat.S_ISLNK(st.st_mode): if sys.platform == 'win32': # os.readlink on Python3 on Windows requires a unicode string. # TODO(jelmer): Don't assume tree_encoding == fs_encoding tree_encoding = sys.getfilesystemencoding() fs_path = fs_path.decode(tree_encoding) blob.data = os.readlink(fs_path).encode(tree_encoding) else: blob.data = os.readlink(fs_path) else: with open(fs_path, 'rb') as f: blob.data = f.read() return blob def read_submodule_head(path): """Read the head commit of a submodule. Args: path: path to the submodule Returns: HEAD sha, None if not a valid head/repository """ from dulwich.errors import NotGitRepository from dulwich.repo import Repo # Repo currently expects a "str", so decode if necessary. # TODO(jelmer): Perhaps move this into Repo() ? if not isinstance(path, str): path = path.decode(sys.getfilesystemencoding()) try: repo = Repo(path) except NotGitRepository: return None try: return repo.head() except KeyError: return None def _has_directory_changed(tree_path, entry): """Check if a directory has changed after getting an error. When handling an error trying to create a blob from a path, call this function. It will check if the path is a directory. If it's a directory and a submodule, check the submodule head to see if it's has changed. If not, consider the file as changed as Git tracked a file and not a directory. Return true if the given path should be considered as changed and False otherwise or if the path is not a directory. """ # This is actually a directory if os.path.exists(os.path.join(tree_path, b'.git')): # Submodule head = read_submodule_head(tree_path) if entry.sha != head: return True else: # The file was changed to a directory, so consider it removed. return True return False def get_unstaged_changes(index, root_path, filter_blob_callback=None): """Walk through an index and check for differences against working tree. Args: index: index to check root_path: path in which to find files Returns: iterator over paths with unstaged changes """ # For each entry in the index check the sha1 & ensure not staged if not isinstance(root_path, bytes): root_path = root_path.encode(sys.getfilesystemencoding()) for tree_path, entry in index.iteritems(): full_path = _tree_to_fs_path(root_path, tree_path) try: st = os.lstat(full_path) if stat.S_ISDIR(st.st_mode): if _has_directory_changed(tree_path, entry): yield tree_path continue blob = blob_from_path_and_stat(full_path, st) if filter_blob_callback is not None: blob = filter_blob_callback(blob, tree_path) except BlockDevice: # The file is now a block device yield tree_path except EnvironmentError as e: if e.errno == errno.ENOENT: # The file was removed, so we assume that counts as # different from whatever file used to exist. yield tree_path else: raise else: if blob.id != entry.sha: yield tree_path os_sep_bytes = os.sep.encode('ascii') def _tree_to_fs_path(root_path, tree_path): """Convert a git tree path to a file system path. Args: root_path: Root filesystem path tree_path: Git tree path as bytes Returns: File system path. """ assert isinstance(tree_path, bytes) if os_sep_bytes != b'/': sep_corrected_path = tree_path.replace(b'/', os_sep_bytes) else: sep_corrected_path = tree_path return os.path.join(root_path, sep_corrected_path) def _fs_to_tree_path(fs_path, fs_encoding=None): """Convert a file system path to a git tree path. Args: fs_path: File system path. fs_encoding: File system encoding Returns: Git tree path as bytes """ if fs_encoding is None: fs_encoding = sys.getfilesystemencoding() if not isinstance(fs_path, bytes): fs_path_bytes = fs_path.encode(fs_encoding) else: fs_path_bytes = fs_path if os_sep_bytes != b'/': tree_path = fs_path_bytes.replace(os_sep_bytes, b'/') else: tree_path = fs_path_bytes return tree_path def index_entry_from_path(path, object_store=None): """Create an index from a filesystem path. This returns an index value for files, symlinks and tree references. for directories and non-existant files it returns None Args: path: Path to create an index entry for object_store: Optional object store to save new blobs in Returns: An index entry; None for directories """ assert isinstance(path, bytes) st = os.lstat(path) if stat.S_ISDIR(st.st_mode): if os.path.exists(os.path.join(path, b'.git')): head = read_submodule_head(path) if head is None: return None return index_entry_from_stat( st, head, 0, mode=S_IFGITLINK) return None - if stat.S_ISBLK(st.st_mode): - return None + if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode): + blob = blob_from_path_and_stat(path, st) + if object_store is not None: + object_store.add_object(blob) + return index_entry_from_stat(st, blob.id, 0) - blob = blob_from_path_and_stat(path, st) - if object_store is not None: - object_store.add_object(blob) - return index_entry_from_stat(st, blob.id, 0) + return None def iter_fresh_entries(paths, root_path, object_store=None): """Iterate over current versions of index entries on disk. Args: paths: Paths to iterate over root_path: Root path to access from store: Optional store to save new blobs in Returns: Iterator over path, index_entry """ for path in paths: p = _tree_to_fs_path(root_path, path) try: entry = index_entry_from_path(p, object_store=object_store) except EnvironmentError as e: if e.errno in (errno.ENOENT, errno.EISDIR): entry = None else: raise yield path, entry def iter_fresh_blobs(index, root_path): """Iterate over versions of blobs on disk referenced by index. Don't use this function; it removes missing entries from index. Args: index: Index file root_path: Root path to access from include_deleted: Include deleted entries with sha and mode set to None Returns: Iterator over path, sha, mode """ import warnings warnings.warn(PendingDeprecationWarning, "Use iter_fresh_objects instead.") for entry in iter_fresh_objects( index, root_path, include_deleted=True): if entry[1] is None: del index[entry[0]] else: yield entry def iter_fresh_objects(paths, root_path, include_deleted=False, object_store=None): """Iterate over versions of objecs on disk referenced by index. Args: index: Index file root_path: Root path to access from include_deleted: Include deleted entries with sha and mode set to None object_store: Optional object store to report new items to Returns: Iterator over path, sha, mode """ for path, entry in iter_fresh_entries(paths, root_path, object_store=object_store): if entry is None: if include_deleted: yield path, None, None else: entry = IndexEntry(*entry) yield path, entry.sha, cleanup_mode(entry.mode) def refresh_index(index, root_path): """Refresh the contents of an index. This is the equivalent to running 'git commit -a'. Args: index: Index to update root_path: Root filesystem path """ for path, entry in iter_fresh_entries(index, root_path): index[path] = path diff --git a/dulwich/repo.py b/dulwich/repo.py index ff8a0267..c0e51000 100644 --- a/dulwich/repo.py +++ b/dulwich/repo.py @@ -1,1484 +1,1485 @@ # repo.py -- For dealing with git repositories. # Copyright (C) 2007 James Westby # Copyright (C) 2008-2013 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Repository access. This module contains the base class for git repositories (BaseRepo) and an implementation which uses a repository on local disk (Repo). """ from io import BytesIO import errno import os import sys import stat import time from dulwich.errors import ( NoIndexPresent, NotBlobError, NotCommitError, NotGitRepository, NotTreeError, NotTagError, CommitError, RefFormatError, HookError, ) from dulwich.file import ( GitFile, ) from dulwich.object_store import ( DiskObjectStore, MemoryObjectStore, ObjectStoreGraphWalker, ) from dulwich.objects import ( check_hexsha, Blob, Commit, ShaFile, Tag, Tree, ) from dulwich.pack import ( pack_objects_to_data, ) from dulwich.hooks import ( PreCommitShellHook, PostCommitShellHook, CommitMsgShellHook, PostReceiveShellHook, ) from dulwich.line_ending import BlobNormalizer from dulwich.refs import ( # noqa: F401 ANNOTATED_TAG_SUFFIX, check_ref_format, RefsContainer, DictRefsContainer, InfoRefsContainer, DiskRefsContainer, read_packed_refs, read_packed_refs_with_peeled, write_packed_refs, SYMREF, ) import warnings CONTROLDIR = '.git' OBJECTDIR = 'objects' REFSDIR = 'refs' REFSDIR_TAGS = 'tags' REFSDIR_HEADS = 'heads' INDEX_FILENAME = "index" COMMONDIR = 'commondir' GITDIR = 'gitdir' WORKTREES = 'worktrees' BASE_DIRECTORIES = [ ["branches"], [REFSDIR], [REFSDIR, REFSDIR_TAGS], [REFSDIR, REFSDIR_HEADS], ["hooks"], ["info"] ] DEFAULT_REF = b'refs/heads/master' class InvalidUserIdentity(Exception): """User identity is not of the format 'user '""" def __init__(self, identity): self.identity = identity def _get_default_identity(): import getpass import socket username = getpass.getuser() try: import pwd except ImportError: fullname = None else: try: gecos = pwd.getpwnam(username).pw_gecos except KeyError: fullname = None else: fullname = gecos.split(',')[0] if not fullname: fullname = username email = os.environ.get('EMAIL') if email is None: email = "{}@{}".format(username, socket.gethostname()) return (fullname, email) def get_user_identity(config, kind=None): """Determine the identity to use for new commits. """ if kind: user = os.environ.get("GIT_" + kind + "_NAME") if user is not None: user = user.encode('utf-8') email = os.environ.get("GIT_" + kind + "_EMAIL") if email is not None: email = email.encode('utf-8') else: user = None email = None if user is None: try: user = config.get(("user", ), "name") except KeyError: user = None if email is None: try: email = config.get(("user", ), "email") except KeyError: email = None default_user, default_email = _get_default_identity() if user is None: user = default_user if not isinstance(user, bytes): user = user.encode('utf-8') if email is None: email = default_email if not isinstance(email, bytes): email = email.encode('utf-8') if email.startswith(b'<') and email.endswith(b'>'): email = email[1:-1] return (user + b" <" + email + b">") def check_user_identity(identity): """Verify that a user identity is formatted correctly. Args: identity: User identity bytestring Raises: InvalidUserIdentity: Raised when identity is invalid """ try: fst, snd = identity.split(b' <', 1) except ValueError: raise InvalidUserIdentity(identity) if b'>' not in snd: raise InvalidUserIdentity(identity) def parse_graftpoints(graftpoints): """Convert a list of graftpoints into a dict Args: graftpoints: Iterator of graftpoint lines Each line is formatted as: []* Resulting dictionary is: : [*] https://git.wiki.kernel.org/index.php/GraftPoint """ grafts = {} for l in graftpoints: raw_graft = l.split(None, 1) commit = raw_graft[0] if len(raw_graft) == 2: parents = raw_graft[1].split() else: parents = [] for sha in [commit] + parents: check_hexsha(sha, 'Invalid graftpoint') grafts[commit] = parents return grafts def serialize_graftpoints(graftpoints): """Convert a dictionary of grafts into string The graft dictionary is: : [*] Each line is formatted as: []* https://git.wiki.kernel.org/index.php/GraftPoint """ graft_lines = [] for commit, parents in graftpoints.items(): if parents: graft_lines.append(commit + b' ' + b' '.join(parents)) else: graft_lines.append(commit) return b'\n'.join(graft_lines) def _set_filesystem_hidden(path): """Mark path as to be hidden if supported by platform and filesystem. On win32 uses SetFileAttributesW api: """ if sys.platform == 'win32': import ctypes from ctypes.wintypes import BOOL, DWORD, LPCWSTR FILE_ATTRIBUTE_HIDDEN = 2 SetFileAttributesW = ctypes.WINFUNCTYPE(BOOL, LPCWSTR, DWORD)( ("SetFileAttributesW", ctypes.windll.kernel32)) if isinstance(path, bytes): path = path.decode(sys.getfilesystemencoding()) if not SetFileAttributesW(path, FILE_ATTRIBUTE_HIDDEN): pass # Could raise or log `ctypes.WinError()` here # Could implement other platform specific filesytem hiding here class BaseRepo(object): """Base class for a git repository. :ivar object_store: Dictionary-like object for accessing the objects :ivar refs: Dictionary-like object with the refs in this repository """ def __init__(self, object_store, refs): """Open a repository. This shouldn't be called directly, but rather through one of the base classes, such as MemoryRepo or Repo. Args: object_store: Object store to use refs: Refs container to use """ self.object_store = object_store self.refs = refs self._graftpoints = {} self.hooks = {} def _determine_file_mode(self): """Probe the file-system to determine whether permissions can be trusted. Returns: True if permissions can be trusted, False otherwise. """ raise NotImplementedError(self._determine_file_mode) def _init_files(self, bare): """Initialize a default set of named files.""" from dulwich.config import ConfigFile self._put_named_file('description', b"Unnamed repository") f = BytesIO() cf = ConfigFile() cf.set("core", "repositoryformatversion", "0") if self._determine_file_mode(): cf.set("core", "filemode", True) else: cf.set("core", "filemode", False) cf.set("core", "bare", bare) cf.set("core", "logallrefupdates", True) cf.write_to_file(f) self._put_named_file('config', f.getvalue()) self._put_named_file(os.path.join('info', 'exclude'), b'') def get_named_file(self, path): """Get a file from the control dir with a specific name. Although the filename should be interpreted as a filename relative to the control dir in a disk-based Repo, the object returned need not be pointing to a file in that location. Args: path: The path to the file, relative to the control dir. Returns: An open file object, or None if the file does not exist. """ raise NotImplementedError(self.get_named_file) def _put_named_file(self, path, contents): """Write a file to the control dir with the given name and contents. Args: path: The path to the file, relative to the control dir. contents: A string to write to the file. """ raise NotImplementedError(self._put_named_file) def _del_named_file(self, path): """Delete a file in the contrl directory with the given name.""" raise NotImplementedError(self._del_named_file) def open_index(self): """Open the index for this repository. Raises: NoIndexPresent: If no index is present Returns: The matching `Index` """ raise NotImplementedError(self.open_index) def fetch(self, target, determine_wants=None, progress=None, depth=None): """Fetch objects into another repository. Args: target: The target repository determine_wants: Optional function to determine what refs to fetch. progress: Optional progress function depth: Optional shallow fetch depth Returns: The local refs """ if determine_wants is None: determine_wants = target.object_store.determine_wants_all count, pack_data = self.fetch_pack_data( determine_wants, target.get_graph_walker(), progress=progress, depth=depth) target.object_store.add_pack_data(count, pack_data, progress) return self.get_refs() def fetch_pack_data(self, determine_wants, graph_walker, progress, get_tagged=None, depth=None): """Fetch the pack data required for a set of revisions. Args: determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch. graph_walker: Object that can iterate over the list of revisions to fetch and has an "ack" method that will be called to acknowledge that a revision is present. progress: Simple progress function that will be called with updated progress strings. get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags. depth: Shallow fetch depth Returns: count and iterator over pack data """ # TODO(jelmer): Fetch pack data directly, don't create objects first. objects = self.fetch_objects(determine_wants, graph_walker, progress, get_tagged, depth=depth) return pack_objects_to_data(objects) def fetch_objects(self, determine_wants, graph_walker, progress, get_tagged=None, depth=None): """Fetch the missing objects required for a set of revisions. Args: determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch. graph_walker: Object that can iterate over the list of revisions to fetch and has an "ack" method that will be called to acknowledge that a revision is present. progress: Simple progress function that will be called with updated progress strings. get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags. depth: Shallow fetch depth Returns: iterator over objects, with __len__ implemented """ if depth not in (None, 0): raise NotImplementedError("depth not supported yet") refs = {} for ref, sha in self.get_refs().items(): try: obj = self.object_store[sha] except KeyError: warnings.warn( 'ref %s points at non-present sha %s' % ( ref.decode('utf-8', 'replace'), sha.decode('ascii')), UserWarning) continue else: if isinstance(obj, Tag): refs[ref + ANNOTATED_TAG_SUFFIX] = obj.object[1] refs[ref] = sha wants = determine_wants(refs) if not isinstance(wants, list): raise TypeError("determine_wants() did not return a list") shallows = getattr(graph_walker, 'shallow', frozenset()) unshallows = getattr(graph_walker, 'unshallow', frozenset()) if wants == []: # TODO(dborowitz): find a way to short-circuit that doesn't change # this interface. if shallows or unshallows: # Do not send a pack in shallow short-circuit path return None return [] # If the graph walker is set up with an implementation that can # ACK/NAK to the wire, it will write data to the client through # this call as a side-effect. haves = self.object_store.find_common_revisions(graph_walker) # Deal with shallow requests separately because the haves do # not reflect what objects are missing if shallows or unshallows: # TODO: filter the haves commits from iter_shas. the specific # commits aren't missing. haves = [] def get_parents(commit): if commit.id in shallows: return [] return self.get_parents(commit.id, commit) return self.object_store.iter_shas( self.object_store.find_missing_objects( haves, wants, progress, get_tagged, get_parents=get_parents)) def get_graph_walker(self, heads=None): """Retrieve a graph walker. A graph walker is used by a remote repository (or proxy) to find out which objects are present in this repository. Args: heads: Repository heads to use (optional) Returns: A graph walker object """ if heads is None: heads = [ sha for sha in self.refs.as_dict(b'refs/heads').values() if sha in self.object_store] return ObjectStoreGraphWalker( heads, self.get_parents, shallow=self.get_shallow()) def get_refs(self): """Get dictionary with all refs. Returns: A ``dict`` mapping ref names to SHA1s """ return self.refs.as_dict() def head(self): """Return the SHA1 pointed at by HEAD.""" return self.refs[b'HEAD'] def _get_object(self, sha, cls): assert len(sha) in (20, 40) ret = self.get_object(sha) if not isinstance(ret, cls): if cls is Commit: raise NotCommitError(ret) elif cls is Blob: raise NotBlobError(ret) elif cls is Tree: raise NotTreeError(ret) elif cls is Tag: raise NotTagError(ret) else: raise Exception("Type invalid: %r != %r" % ( ret.type_name, cls.type_name)) return ret def get_object(self, sha): """Retrieve the object with the specified SHA. Args: sha: SHA to retrieve Returns: A ShaFile object Raises: KeyError: when the object can not be found """ return self.object_store[sha] def get_parents(self, sha, commit=None): """Retrieve the parents of a specific commit. If the specific commit is a graftpoint, the graft parents will be returned instead. Args: sha: SHA of the commit for which to retrieve the parents commit: Optional commit matching the sha Returns: List of parents """ try: return self._graftpoints[sha] except KeyError: if commit is None: commit = self[sha] return commit.parents def get_config(self): """Retrieve the config object. Returns: `ConfigFile` object for the ``.git/config`` file. """ raise NotImplementedError(self.get_config) def get_description(self): """Retrieve the description for this repository. Returns: String with the description of the repository as set by the user. """ raise NotImplementedError(self.get_description) def set_description(self, description): """Set the description for this repository. Args: description: Text to set as description for this repository. """ raise NotImplementedError(self.set_description) def get_config_stack(self): """Return a config stack for this repository. This stack accesses the configuration for both this repository itself (.git/config) and the global configuration, which usually lives in ~/.gitconfig. Returns: `Config` instance for this repository """ from dulwich.config import StackedConfig backends = [self.get_config()] + StackedConfig.default_backends() return StackedConfig(backends, writable=backends[0]) def get_shallow(self): """Get the set of shallow commits. Returns: Set of shallow commits. """ f = self.get_named_file('shallow') if f is None: return set() with f: return set(l.strip() for l in f) def update_shallow(self, new_shallow, new_unshallow): """Update the list of shallow objects. Args: new_shallow: Newly shallow objects new_unshallow: Newly no longer shallow objects """ shallow = self.get_shallow() if new_shallow: shallow.update(new_shallow) if new_unshallow: shallow.difference_update(new_unshallow) self._put_named_file( 'shallow', b''.join([sha + b'\n' for sha in shallow])) def get_peeled(self, ref): """Get the peeled value of a ref. Args: ref: The refname to peel. Returns: The fully-peeled SHA1 of a tag object, after peeling all intermediate tags; if the original ref does not point to a tag, this will equal the original SHA1. """ cached = self.refs.get_peeled(ref) if cached is not None: return cached return self.object_store.peel_sha(self.refs[ref]).id def get_walker(self, include=None, *args, **kwargs): """Obtain a walker for this repository. Args: include: Iterable of SHAs of commits to include along with their ancestors. Defaults to [HEAD] exclude: Iterable of SHAs of commits to exclude along with their ancestors, overriding includes. order: ORDER_* constant specifying the order of results. Anything other than ORDER_DATE may result in O(n) memory usage. reverse: If True, reverse the order of output, requiring O(n) memory. max_entries: The maximum number of entries to yield, or None for no limit. paths: Iterable of file or subtree paths to show entries for. rename_detector: diff.RenameDetector object for detecting renames. follow: If True, follow path across renames/copies. Forces a default rename_detector. since: Timestamp to list commits after. until: Timestamp to list commits before. queue_cls: A class to use for a queue of commits, supporting the iterator protocol. The constructor takes a single argument, the Walker. Returns: A `Walker` object """ from dulwich.walk import Walker if include is None: include = [self.head()] if isinstance(include, str): include = [include] kwargs['get_parents'] = lambda commit: self.get_parents( commit.id, commit) return Walker(self.object_store, include, *args, **kwargs) def __getitem__(self, name): """Retrieve a Git object by SHA1 or ref. Args: name: A Git object SHA1 or a ref name Returns: A `ShaFile` object, such as a Commit or Blob Raises: KeyError: when the specified ref or object does not exist """ if not isinstance(name, bytes): raise TypeError("'name' must be bytestring, not %.80s" % type(name).__name__) if len(name) in (20, 40): try: return self.object_store[name] except (KeyError, ValueError): pass try: return self.object_store[self.refs[name]] except RefFormatError: raise KeyError(name) def __contains__(self, name): """Check if a specific Git object or ref is present. Args: name: Git object SHA1 or ref name """ if len(name) in (20, 40): return name in self.object_store or name in self.refs else: return name in self.refs def __setitem__(self, name, value): """Set a ref. Args: name: ref name value: Ref value - either a ShaFile object, or a hex sha """ if name.startswith(b"refs/") or name == b'HEAD': if isinstance(value, ShaFile): self.refs[name] = value.id elif isinstance(value, bytes): self.refs[name] = value else: raise TypeError(value) else: raise ValueError(name) def __delitem__(self, name): """Remove a ref. Args: name: Name of the ref to remove """ if name.startswith(b"refs/") or name == b"HEAD": del self.refs[name] else: raise ValueError(name) def _get_user_identity(self, config, kind=None): """Determine the identity to use for new commits. """ # TODO(jelmer): Deprecate this function in favor of get_user_identity return get_user_identity(config) def _add_graftpoints(self, updated_graftpoints): """Add or modify graftpoints Args: updated_graftpoints: Dict of commit shas to list of parent shas """ # Simple validation for commit, parents in updated_graftpoints.items(): for sha in [commit] + parents: check_hexsha(sha, 'Invalid graftpoint') self._graftpoints.update(updated_graftpoints) def _remove_graftpoints(self, to_remove=[]): """Remove graftpoints Args: to_remove: List of commit shas """ for sha in to_remove: del self._graftpoints[sha] def _read_heads(self, name): f = self.get_named_file(name) if f is None: return [] with f: return [l.strip() for l in f.readlines() if l.strip()] def do_commit(self, message=None, committer=None, author=None, commit_timestamp=None, commit_timezone=None, author_timestamp=None, author_timezone=None, tree=None, encoding=None, ref=b'HEAD', merge_heads=None): """Create a new commit. Args: message: Commit message committer: Committer fullname author: Author fullname (defaults to committer) commit_timestamp: Commit timestamp (defaults to now) commit_timezone: Commit timestamp timezone (defaults to GMT) author_timestamp: Author timestamp (defaults to commit timestamp) author_timezone: Author timestamp timezone (defaults to commit timestamp timezone) tree: SHA1 of the tree root to use (if not specified the current index will be committed). encoding: Encoding ref: Optional ref to commit to (defaults to current branch) merge_heads: Merge heads (defaults to .git/MERGE_HEADS) Returns: New commit SHA1 """ import time c = Commit() if tree is None: index = self.open_index() c.tree = index.commit(self.object_store) else: if len(tree) != 40: raise ValueError("tree must be a 40-byte hex sha string") c.tree = tree try: self.hooks['pre-commit'].execute() except HookError as e: raise CommitError(e) except KeyError: # no hook defined, silent fallthrough pass config = self.get_config_stack() if merge_heads is None: merge_heads = self._read_heads('MERGE_HEADS') if committer is None: committer = get_user_identity(config, kind='COMMITTER') check_user_identity(committer) c.committer = committer if commit_timestamp is None: # FIXME: Support GIT_COMMITTER_DATE environment variable commit_timestamp = time.time() c.commit_time = int(commit_timestamp) if commit_timezone is None: # FIXME: Use current user timezone rather than UTC commit_timezone = 0 c.commit_timezone = commit_timezone if author is None: author = get_user_identity(config, kind='AUTHOR') c.author = author check_user_identity(author) if author_timestamp is None: # FIXME: Support GIT_AUTHOR_DATE environment variable author_timestamp = commit_timestamp c.author_time = int(author_timestamp) if author_timezone is None: author_timezone = commit_timezone c.author_timezone = author_timezone if encoding is None: try: encoding = config.get(('i18n', ), 'commitEncoding') except KeyError: pass # No dice if encoding is not None: c.encoding = encoding if message is None: # FIXME: Try to read commit message from .git/MERGE_MSG raise ValueError("No commit message specified") try: c.message = self.hooks['commit-msg'].execute(message) if c.message is None: c.message = message except HookError as e: raise CommitError(e) except KeyError: # no hook defined, message not modified c.message = message if ref is None: # Create a dangling commit c.parents = merge_heads self.object_store.add_object(c) else: try: old_head = self.refs[ref] c.parents = [old_head] + merge_heads self.object_store.add_object(c) ok = self.refs.set_if_equals( ref, old_head, c.id, message=b"commit: " + message, committer=committer, timestamp=commit_timestamp, timezone=commit_timezone) except KeyError: c.parents = merge_heads self.object_store.add_object(c) ok = self.refs.add_if_new( ref, c.id, message=b"commit: " + message, committer=committer, timestamp=commit_timestamp, timezone=commit_timezone) if not ok: # Fail if the atomic compare-and-swap failed, leaving the # commit and all its objects as garbage. raise CommitError("%s changed during commit" % (ref,)) self._del_named_file('MERGE_HEADS') try: self.hooks['post-commit'].execute() except HookError as e: # silent failure warnings.warn("post-commit hook failed: %s" % e, UserWarning) except KeyError: # no hook defined, silent fallthrough pass return c.id def read_gitfile(f): """Read a ``.git`` file. The first line of the file should start with "gitdir: " Args: f: File-like object to read from Returns: A path """ cs = f.read() if not cs.startswith("gitdir: "): raise ValueError("Expected file to start with 'gitdir: '") return cs[len("gitdir: "):].rstrip("\n") class Repo(BaseRepo): """A git repository backed by local disk. To open an existing repository, call the contructor with the path of the repository. To create a new repository, use the Repo.init class method. """ def __init__(self, root): hidden_path = os.path.join(root, CONTROLDIR) if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)): self.bare = False self._controldir = hidden_path elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and os.path.isdir(os.path.join(root, REFSDIR))): self.bare = True self._controldir = root elif os.path.isfile(hidden_path): self.bare = False with open(hidden_path, 'r') as f: path = read_gitfile(f) self.bare = False self._controldir = os.path.join(root, path) else: raise NotGitRepository( "No git repository was found at %(path)s" % dict(path=root) ) commondir = self.get_named_file(COMMONDIR) if commondir is not None: with commondir: self._commondir = os.path.join( self.controldir(), commondir.read().rstrip(b"\r\n").decode( sys.getfilesystemencoding())) else: self._commondir = self._controldir self.path = root config = self.get_config() object_store = DiskObjectStore.from_config( os.path.join(self.commondir(), OBJECTDIR), config) refs = DiskRefsContainer(self.commondir(), self._controldir, logger=self._write_reflog) BaseRepo.__init__(self, object_store, refs) self._graftpoints = {} graft_file = self.get_named_file(os.path.join("info", "grafts"), basedir=self.commondir()) if graft_file: with graft_file: self._graftpoints.update(parse_graftpoints(graft_file)) graft_file = self.get_named_file("shallow", basedir=self.commondir()) if graft_file: with graft_file: self._graftpoints.update(parse_graftpoints(graft_file)) self.hooks['pre-commit'] = PreCommitShellHook(self.controldir()) self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir()) self.hooks['post-commit'] = PostCommitShellHook(self.controldir()) self.hooks['post-receive'] = PostReceiveShellHook(self.controldir()) def _write_reflog(self, ref, old_sha, new_sha, committer, timestamp, timezone, message): from .reflog import format_reflog_line path = os.path.join( self.controldir(), 'logs', ref.decode(sys.getfilesystemencoding())) try: os.makedirs(os.path.dirname(path)) except OSError as e: if e.errno != errno.EEXIST: raise if committer is None: config = self.get_config_stack() committer = self._get_user_identity(config) check_user_identity(committer) if timestamp is None: timestamp = int(time.time()) if timezone is None: timezone = 0 # FIXME with open(path, 'ab') as f: f.write(format_reflog_line(old_sha, new_sha, committer, timestamp, timezone, message) + b'\n') @classmethod def discover(cls, start='.'): """Iterate parent directories to discover a repository Return a Repo object for the first parent directory that looks like a Git repository. Args: start: The directory to start discovery from (defaults to '.') """ remaining = True path = os.path.abspath(start) while remaining: try: return cls(path) except NotGitRepository: path, remaining = os.path.split(path) raise NotGitRepository( "No git repository was found at %(path)s" % dict(path=start) ) def controldir(self): """Return the path of the control directory.""" return self._controldir def commondir(self): """Return the path of the common directory. For a main working tree, it is identical to controldir(). For a linked working tree, it is the control directory of the main working tree.""" return self._commondir def _determine_file_mode(self): """Probe the file-system to determine whether permissions can be trusted. Returns: True if permissions can be trusted, False otherwise. """ fname = os.path.join(self.path, '.probe-permissions') with open(fname, 'w') as f: f.write('') st1 = os.lstat(fname) try: os.chmod(fname, st1.st_mode ^ stat.S_IXUSR) except EnvironmentError as e: if e.errno == errno.EPERM: return False raise st2 = os.lstat(fname) os.unlink(fname) mode_differs = st1.st_mode != st2.st_mode st2_has_exec = (st2.st_mode & stat.S_IXUSR) != 0 return mode_differs and st2_has_exec def _put_named_file(self, path, contents): """Write a file to the control dir with the given name and contents. Args: path: The path to the file, relative to the control dir. contents: A string to write to the file. """ path = path.lstrip(os.path.sep) with GitFile(os.path.join(self.controldir(), path), 'wb') as f: f.write(contents) def _del_named_file(self, path): try: os.unlink(os.path.join(self.controldir(), path)) except (IOError, OSError) as e: if e.errno == errno.ENOENT: return raise def get_named_file(self, path, basedir=None): """Get a file from the control dir with a specific name. Although the filename should be interpreted as a filename relative to the control dir in a disk-based Repo, the object returned need not be pointing to a file in that location. Args: path: The path to the file, relative to the control dir. basedir: Optional argument that specifies an alternative to the control dir. Returns: An open file object, or None if the file does not exist. """ # TODO(dborowitz): sanitize filenames, since this is used directly by # the dumb web serving code. if basedir is None: basedir = self.controldir() path = path.lstrip(os.path.sep) try: return open(os.path.join(basedir, path), 'rb') except (IOError, OSError) as e: if e.errno == errno.ENOENT: return None raise def index_path(self): """Return path to the index file.""" return os.path.join(self.controldir(), INDEX_FILENAME) def open_index(self): """Open the index for this repository. Raises: NoIndexPresent: If no index is present Returns: The matching `Index` """ from dulwich.index import Index if not self.has_index(): raise NoIndexPresent() return Index(self.index_path()) def has_index(self): """Check if an index is present.""" # Bare repos must never have index files; non-bare repos may have a # missing index file, which is treated as empty. return not self.bare def stage(self, fs_paths): """Stage a set of paths. Args: fs_paths: List of paths, relative to the repository path """ root_path_bytes = self.path.encode(sys.getfilesystemencoding()) if not isinstance(fs_paths, list): fs_paths = [fs_paths] from dulwich.index import ( blob_from_path_and_stat, index_entry_from_stat, _fs_to_tree_path, ) index = self.open_index() blob_normalizer = self.get_blob_normalizer() for fs_path in fs_paths: if not isinstance(fs_path, bytes): fs_path = fs_path.encode(sys.getfilesystemencoding()) if os.path.isabs(fs_path): raise ValueError( "path %r should be relative to " "repository root, not absolute" % fs_path) tree_path = _fs_to_tree_path(fs_path) full_path = os.path.join(root_path_bytes, fs_path) try: st = os.lstat(full_path) except OSError: # File no longer exists try: del index[tree_path] except KeyError: pass # already removed else: - if stat.S_ISDIR(st.st_mode) or stat.S_ISBLK(st.st_mode): + if (not stat.S_ISREG(st.st_mode) and + not stat.S_ISLNK(st.st_mode)): try: del index[tree_path] except KeyError: pass else: blob = blob_from_path_and_stat(full_path, st) blob = blob_normalizer.checkin_normalize(blob, fs_path) self.object_store.add_object(blob) index[tree_path] = index_entry_from_stat(st, blob.id, 0) index.write() def clone(self, target_path, mkdir=True, bare=False, origin=b"origin", checkout=None): """Clone this repository. Args: target_path: Target path mkdir: Create the target directory bare: Whether to create a bare repository origin: Base name for refs in target repository cloned from this repository Returns: Created repository as `Repo` """ if not bare: target = self.init(target_path, mkdir=mkdir) else: if checkout: raise ValueError("checkout and bare are incompatible") target = self.init_bare(target_path, mkdir=mkdir) self.fetch(target) encoded_path = self.path if not isinstance(encoded_path, bytes): encoded_path = encoded_path.encode(sys.getfilesystemencoding()) ref_message = b"clone: from " + encoded_path target.refs.import_refs( b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'), message=ref_message) target.refs.import_refs( b'refs/tags', self.refs.as_dict(b'refs/tags'), message=ref_message) try: target.refs.add_if_new( DEFAULT_REF, self.refs[DEFAULT_REF], message=ref_message) except KeyError: pass target_config = target.get_config() target_config.set(('remote', 'origin'), 'url', encoded_path) target_config.set(('remote', 'origin'), 'fetch', '+refs/heads/*:refs/remotes/origin/*') target_config.write_to_path() # Update target head head_chain, head_sha = self.refs.follow(b'HEAD') if head_chain and head_sha is not None: target.refs.set_symbolic_ref(b'HEAD', head_chain[-1], message=ref_message) target[b'HEAD'] = head_sha if checkout is None: checkout = (not bare) if checkout: # Checkout HEAD to target dir target.reset_index() return target def reset_index(self, tree=None): """Reset the index back to a specific tree. Args: tree: Tree SHA to reset to, None for current HEAD tree. """ from dulwich.index import ( build_index_from_tree, validate_path_element_default, validate_path_element_ntfs, ) if tree is None: tree = self[b'HEAD'].tree config = self.get_config() honor_filemode = config.get_boolean( b'core', b'filemode', os.name != "nt") if config.get_boolean(b'core', b'core.protectNTFS', os.name == "nt"): validate_path_element = validate_path_element_ntfs else: validate_path_element = validate_path_element_default return build_index_from_tree( self.path, self.index_path(), self.object_store, tree, honor_filemode=honor_filemode, validate_path_element=validate_path_element) def get_config(self): """Retrieve the config object. Returns: `ConfigFile` object for the ``.git/config`` file. """ from dulwich.config import ConfigFile path = os.path.join(self._controldir, 'config') try: return ConfigFile.from_path(path) except (IOError, OSError) as e: if e.errno != errno.ENOENT: raise ret = ConfigFile() ret.path = path return ret def get_description(self): """Retrieve the description of this repository. Returns: A string describing the repository or None. """ path = os.path.join(self._controldir, 'description') try: with GitFile(path, 'rb') as f: return f.read() except (IOError, OSError) as e: if e.errno != errno.ENOENT: raise return None def __repr__(self): return "" % self.path def set_description(self, description): """Set the description for this repository. Args: description: Text to set as description for this repository. """ self._put_named_file('description', description) @classmethod def _init_maybe_bare(cls, path, bare): for d in BASE_DIRECTORIES: os.mkdir(os.path.join(path, *d)) DiskObjectStore.init(os.path.join(path, OBJECTDIR)) ret = cls(path) ret.refs.set_symbolic_ref(b'HEAD', DEFAULT_REF) ret._init_files(bare) return ret @classmethod def init(cls, path, mkdir=False): """Create a new repository. Args: path: Path in which to create the repository mkdir: Whether to create the directory Returns: `Repo` instance """ if mkdir: os.mkdir(path) controldir = os.path.join(path, CONTROLDIR) os.mkdir(controldir) _set_filesystem_hidden(controldir) cls._init_maybe_bare(controldir, False) return cls(path) @classmethod def _init_new_working_directory(cls, path, main_repo, identifier=None, mkdir=False): """Create a new working directory linked to a repository. Args: path: Path in which to create the working tree. main_repo: Main repository to reference identifier: Worktree identifier mkdir: Whether to create the directory Returns: `Repo` instance """ if mkdir: os.mkdir(path) if identifier is None: identifier = os.path.basename(path) main_worktreesdir = os.path.join(main_repo.controldir(), WORKTREES) worktree_controldir = os.path.join(main_worktreesdir, identifier) gitdirfile = os.path.join(path, CONTROLDIR) with open(gitdirfile, 'wb') as f: f.write(b'gitdir: ' + worktree_controldir.encode(sys.getfilesystemencoding()) + b'\n') try: os.mkdir(main_worktreesdir) except OSError as e: if e.errno != errno.EEXIST: raise try: os.mkdir(worktree_controldir) except OSError as e: if e.errno != errno.EEXIST: raise with open(os.path.join(worktree_controldir, GITDIR), 'wb') as f: f.write(gitdirfile.encode(sys.getfilesystemencoding()) + b'\n') with open(os.path.join(worktree_controldir, COMMONDIR), 'wb') as f: f.write(b'../..\n') with open(os.path.join(worktree_controldir, 'HEAD'), 'wb') as f: f.write(main_repo.head() + b'\n') r = cls(path) r.reset_index() return r @classmethod def init_bare(cls, path, mkdir=False): """Create a new bare repository. ``path`` should already exist and be an empty directory. Args: path: Path to create bare repository in Returns: a `Repo` instance """ if mkdir: os.mkdir(path) return cls._init_maybe_bare(path, True) create = init_bare def close(self): """Close any files opened by this repository.""" self.object_store.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def get_blob_normalizer(self): """ Return a BlobNormalizer object """ # TODO Parse the git attributes files git_attributes = {} return BlobNormalizer( self.get_config_stack(), git_attributes ) class MemoryRepo(BaseRepo): """Repo that stores refs, objects, and named files in memory. MemoryRepos are always bare: they have no working tree and no index, since those have a stronger dependency on the filesystem. """ def __init__(self): from dulwich.config import ConfigFile self._reflog = [] refs_container = DictRefsContainer({}, logger=self._append_reflog) BaseRepo.__init__(self, MemoryObjectStore(), refs_container) self._named_files = {} self.bare = True self._config = ConfigFile() self._description = None def _append_reflog(self, *args): self._reflog.append(args) def set_description(self, description): self._description = description def get_description(self): return self._description def _determine_file_mode(self): """Probe the file-system to determine whether permissions can be trusted. Returns: True if permissions can be trusted, False otherwise. """ return sys.platform != 'win32' def _put_named_file(self, path, contents): """Write a file to the control dir with the given name and contents. Args: path: The path to the file, relative to the control dir. contents: A string to write to the file. """ self._named_files[path] = contents def _del_named_file(self, path): try: del self._named_files[path] except KeyError: pass def get_named_file(self, path, basedir=None): """Get a file from the control dir with a specific name. Although the filename should be interpreted as a filename relative to the control dir in a disk-baked Repo, the object returned need not be pointing to a file in that location. Args: path: The path to the file, relative to the control dir. Returns: An open file object, or None if the file does not exist. """ contents = self._named_files.get(path, None) if contents is None: return None return BytesIO(contents) def open_index(self): """Fail to open index for this repo, since it is bare. Raises: NoIndexPresent: Raised when no index is present """ raise NoIndexPresent() def get_config(self): """Retrieve the config object. Returns: `ConfigFile` object. """ return self._config @classmethod def init_bare(cls, objects, refs): """Create a new bare repository in memory. Args: objects: Objects for the new repository, as iterable refs: Refs as dictionary, mapping names to object SHA1s """ ret = cls() for obj in objects: ret.object_store.add_object(obj) for refname, sha in refs.items(): ret.refs.add_if_new(refname, sha) ret._init_files(bare=True) return ret