diff --git a/dulwich/server.py b/dulwich/server.py index 7a4e1859..ec6cfcfe 100644 --- a/dulwich/server.py +++ b/dulwich/server.py @@ -1,1219 +1,1220 @@ # server.py -- Implementation of the server side git protocols # Copyright (C) 2008 John Carr # Coprygith (C) 2011-2012 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Git smart network protocol server implementation. For more detailed implementation on the network protocol, see the Documentation/technical directory in the cgit distribution, and in particular: * Documentation/technical/protocol-capabilities.txt * Documentation/technical/pack-protocol.txt Currently supported capabilities: * include-tag * thin-pack * multi_ack_detailed * multi_ack * side-band-64k * ofs-delta * no-progress * report-status * delete-refs * shallow * symref """ import collections import os import socket import sys import time from typing import List, Tuple, Dict, Optional, Iterable import zlib import socketserver from dulwich.archive import tar_stream from dulwich.errors import ( ApplyDeltaError, ChecksumMismatch, GitProtocolError, HookError, NotGitRepository, UnexpectedCommandError, ObjectFormatException, ) from dulwich import log_utils from dulwich.objects import ( Commit, valid_hexsha, ) from dulwich.pack import ( write_pack_objects, ) from dulwich.protocol import ( # noqa: F401 BufferedPktLineWriter, capability_agent, CAPABILITIES_REF, CAPABILITY_AGENT, CAPABILITY_DELETE_REFS, CAPABILITY_INCLUDE_TAG, CAPABILITY_MULTI_ACK_DETAILED, CAPABILITY_MULTI_ACK, CAPABILITY_NO_DONE, CAPABILITY_NO_PROGRESS, CAPABILITY_OFS_DELTA, CAPABILITY_QUIET, CAPABILITY_REPORT_STATUS, CAPABILITY_SHALLOW, CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK, COMMAND_DEEPEN, COMMAND_DONE, COMMAND_HAVE, COMMAND_SHALLOW, COMMAND_UNSHALLOW, COMMAND_WANT, MULTI_ACK, MULTI_ACK_DETAILED, Protocol, ProtocolFile, ReceivableProtocol, SIDE_BAND_CHANNEL_DATA, SIDE_BAND_CHANNEL_PROGRESS, SIDE_BAND_CHANNEL_FATAL, SINGLE_ACK, TCP_GIT_PORT, ZERO_SHA, ack_type, extract_capabilities, extract_want_line_capabilities, symref_capabilities, ) from dulwich.refs import ( ANNOTATED_TAG_SUFFIX, write_info_refs, ) from dulwich.repo import ( BaseRepo, Repo, ) logger = log_utils.getLogger(__name__) class Backend(object): """A backend for the Git smart server implementation.""" def open_repository(self, path): """Open the repository at a path. Args: path: Path to the repository Raises: NotGitRepository: no git repository was found at path Returns: Instance of BackendRepo """ raise NotImplementedError(self.open_repository) class BackendRepo(object): """Repository abstraction used by the Git server. The methods required here are a subset of those provided by dulwich.repo.Repo. """ object_store = None refs = None def get_refs(self) -> Dict[bytes, bytes]: """ Get all the refs in the repository Returns: dict of name -> sha """ raise NotImplementedError def get_peeled(self, name: bytes) -> Optional[bytes]: """Return the cached peeled value of a ref, if available. Args: name: Name of the ref to peel Returns: The peeled value of the ref. If the ref is known not point to a tag, this will be the SHA the ref refers to. If no cached information about a tag is available, this method may return None, but it should attempt to peel the tag if possible. """ return None def fetch_objects(self, determine_wants, graph_walker, progress, get_tagged=None): """ Yield the objects required for a list of commits. Args: progress: is a callback to send progress messages to the client get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags. """ raise NotImplementedError class DictBackend(Backend): """Trivial backend that looks up Git repositories in a dictionary.""" def __init__(self, repos): self.repos = repos def open_repository(self, path: str) -> BaseRepo: logger.debug('Opening repository at %s', path) try: return self.repos[path] except KeyError: raise NotGitRepository( "No git repository was found at %(path)s" % dict(path=path) ) class FileSystemBackend(Backend): """Simple backend looking up Git repositories in the local file system.""" def __init__(self, root=os.sep): super(FileSystemBackend, self).__init__() self.root = (os.path.abspath(root) + os.sep).replace( os.sep * 2, os.sep) def open_repository(self, path): logger.debug('opening repository at %s', path) abspath = os.path.abspath(os.path.join(self.root, path)) + os.sep normcase_abspath = os.path.normcase(abspath) normcase_root = os.path.normcase(self.root) if not normcase_abspath.startswith(normcase_root): raise NotGitRepository( "Path %r not inside root %r" % (path, self.root)) return Repo(abspath) class Handler(object): """Smart protocol command handler base class.""" - def __init__(self, backend, proto, http_req=None): + def __init__(self, backend, proto, stateless_rpc=None): self.backend = backend self.proto = proto - self.http_req = http_req + self.stateless_rpc = stateless_rpc def handle(self): raise NotImplementedError(self.handle) class PackHandler(Handler): """Protocol handler for packs.""" - def __init__(self, backend, proto, http_req=None): - super(PackHandler, self).__init__(backend, proto, http_req) + def __init__(self, backend, proto, stateless_rpc=None): + super(PackHandler, self).__init__(backend, proto, stateless_rpc) self._client_capabilities = None # Flags needed for the no-done capability self._done_received = False @classmethod def capability_line(cls, capabilities): logger.info('Sending capabilities: %s', capabilities) return b"".join([b" " + c for c in capabilities]) @classmethod def capabilities(cls) -> Iterable[bytes]: raise NotImplementedError(cls.capabilities) @classmethod def innocuous_capabilities(cls) -> Iterable[bytes]: return [CAPABILITY_INCLUDE_TAG, CAPABILITY_THIN_PACK, CAPABILITY_NO_PROGRESS, CAPABILITY_OFS_DELTA, capability_agent()] @classmethod def required_capabilities(cls) -> Iterable[bytes]: """Return a list of capabilities that we require the client to have.""" return [] def set_client_capabilities(self, caps: Iterable[bytes]) -> None: allowable_caps = set(self.innocuous_capabilities()) allowable_caps.update(self.capabilities()) for cap in caps: if cap.startswith(CAPABILITY_AGENT + b'='): continue if cap not in allowable_caps: raise GitProtocolError('Client asked for capability %r that ' 'was not advertised.' % cap) for cap in self.required_capabilities(): if cap not in caps: raise GitProtocolError('Client does not support required ' 'capability %r.' % cap) self._client_capabilities = set(caps) logger.info('Client capabilities: %s', caps) def has_capability(self, cap: bytes) -> bool: if self._client_capabilities is None: raise GitProtocolError('Server attempted to access capability %r ' 'before asking client' % cap) return cap in self._client_capabilities def notify_done(self) -> None: self._done_received = True class UploadPackHandler(PackHandler): """Protocol handler for uploading a pack to the client.""" - def __init__(self, backend, args, proto, http_req=None, + def __init__(self, backend, args, proto, stateless_rpc=None, advertise_refs=False): super(UploadPackHandler, self).__init__( - backend, proto, http_req=http_req) + backend, proto, stateless_rpc=stateless_rpc) self.repo = backend.open_repository(args[0]) self._graph_walker = None self.advertise_refs = advertise_refs # A state variable for denoting that the have list is still # being processed, and the client is not accepting any other # data (such as side-band, see the progress method here). self._processing_have_lines = False @classmethod def capabilities(cls): return [CAPABILITY_MULTI_ACK_DETAILED, CAPABILITY_MULTI_ACK, CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK, CAPABILITY_OFS_DELTA, CAPABILITY_NO_PROGRESS, CAPABILITY_INCLUDE_TAG, CAPABILITY_SHALLOW, CAPABILITY_NO_DONE] @classmethod def required_capabilities(cls): return (CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK, CAPABILITY_OFS_DELTA) def progress(self, message): if (self.has_capability(CAPABILITY_NO_PROGRESS) or self._processing_have_lines): return self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, message) def get_tagged(self, refs=None, repo=None): """Get a dict of peeled values of tags to their original tag shas. Args: refs: dict of refname -> sha of possible tags; defaults to all of the backend's refs. repo: optional Repo instance for getting peeled refs; defaults to the backend's repo, if available Returns: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a tag whose peeled value is peeled_sha. """ if not self.has_capability(CAPABILITY_INCLUDE_TAG): return {} if refs is None: refs = self.repo.get_refs() if repo is None: repo = getattr(self.repo, "repo", None) if repo is None: # Bail if we don't have a Repo available; this is ok since # clients must be able to handle if the server doesn't include # all relevant tags. # TODO: fix behavior when missing return {} # TODO(jelmer): Integrate this with the refs logic in # Repo.fetch_objects tagged = {} for name, sha in refs.items(): peeled_sha = repo.get_peeled(name) if peeled_sha != sha: tagged[peeled_sha] = sha return tagged def handle(self): def write(x): return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x) graph_walker = _ProtocolGraphWalker( self, self.repo.object_store, self.repo.get_peeled, self.repo.refs.get_symrefs) objects_iter = self.repo.fetch_objects( graph_walker.determine_wants, graph_walker, self.progress, get_tagged=self.get_tagged) # Note the fact that client is only processing responses related # to the have lines it sent, and any other data (including side- # band) will be be considered a fatal error. self._processing_have_lines = True # Did the process short-circuit (e.g. in a stateless RPC call)? Note # that the client still expects a 0-object pack in most cases. # Also, if it also happens that the object_iter is instantiated # with a graph walker with an implementation that talks over the # wire (which is this instance of this class) this will actually # iterate through everything and write things out to the wire. if len(objects_iter) == 0: return # The provided haves are processed, and it is safe to send side- # band data now. self._processing_have_lines = False if not graph_walker.handle_done( not self.has_capability(CAPABILITY_NO_DONE), self._done_received): return self.progress( ("counting objects: %d, done.\n" % len(objects_iter)).encode( 'ascii')) write_pack_objects(ProtocolFile(None, write), objects_iter) # we are done self.proto.write_pkt_line(None) def _split_proto_line(line, allowed): """Split a line read from the wire. Args: line: The line read from the wire. allowed: An iterable of command names that should be allowed. Command names not listed below as possible return values will be ignored. If None, any commands from the possible return values are allowed. Returns: a tuple having one of the following forms: ('want', obj_id) ('have', obj_id) ('done', None) (None, None) (for a flush-pkt) Raises: UnexpectedCommandError: if the line cannot be parsed into one of the allowed return values. """ if not line: fields = [None] else: fields = line.rstrip(b'\n').split(b' ', 1) command = fields[0] if allowed is not None and command not in allowed: raise UnexpectedCommandError(command) if len(fields) == 1 and command in (COMMAND_DONE, None): return (command, None) elif len(fields) == 2: if command in (COMMAND_WANT, COMMAND_HAVE, COMMAND_SHALLOW, COMMAND_UNSHALLOW): if not valid_hexsha(fields[1]): raise GitProtocolError("Invalid sha") return tuple(fields) elif command == COMMAND_DEEPEN: return command, int(fields[1]) raise GitProtocolError('Received invalid line from client: %r' % line) def _find_shallow(store, heads, depth): """Find shallow commits according to a given depth. Args: store: An ObjectStore for looking up objects. heads: Iterable of head SHAs to start walking from. depth: The depth of ancestors to include. A depth of one includes only the heads themselves. Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be considered shallow and unshallow according to the arguments. Note that these sets may overlap if a commit is reachable along multiple paths. """ parents = {} def get_parents(sha): result = parents.get(sha, None) if not result: result = store[sha].parents parents[sha] = result return result todo = [] # stack of (sha, depth) for head_sha in heads: obj = store.peel_sha(head_sha) if isinstance(obj, Commit): todo.append((obj.id, 1)) not_shallow = set() shallow = set() while todo: sha, cur_depth = todo.pop() if cur_depth < depth: not_shallow.add(sha) new_depth = cur_depth + 1 todo.extend((p, new_depth) for p in get_parents(sha)) else: shallow.add(sha) return shallow, not_shallow def _want_satisfied(store, haves, want, earliest): o = store[want] pending = collections.deque([o]) known = set([want]) while pending: commit = pending.popleft() if commit.id in haves: return True if commit.type_name != b"commit": # non-commit wants are assumed to be satisfied continue for parent in commit.parents: if parent in known: continue known.add(parent) parent_obj = store[parent] # TODO: handle parents with later commit times than children if parent_obj.commit_time >= earliest: pending.append(parent_obj) return False def _all_wants_satisfied(store, haves, wants): """Check whether all the current wants are satisfied by a set of haves. Args: store: Object store to retrieve objects from haves: A set of commits we know the client has. wants: A set of commits the client wants Note: Wants are specified with set_wants rather than passed in since in the current interface they are determined outside this class. """ haves = set(haves) if haves: earliest = min([store[h].commit_time for h in haves]) else: earliest = 0 for want in wants: if not _want_satisfied(store, haves, want, earliest): return False return True class _ProtocolGraphWalker(object): """A graph walker that knows the git protocol. As a graph walker, this class implements ack(), next(), and reset(). It also contains some base methods for interacting with the wire and walking the commit tree. The work of determining which acks to send is passed on to the implementation instance stored in _impl. The reason for this is that we do not know at object creation time what ack level the protocol requires. A call to set_ack_type() is required to set up the implementation, before any calls to next() or ack() are made. """ def __init__(self, handler, object_store, get_peeled, get_symrefs): self.handler = handler self.store = object_store self.get_peeled = get_peeled self.get_symrefs = get_symrefs self.proto = handler.proto - self.http_req = handler.http_req + self.stateless_rpc = handler.stateless_rpc self.advertise_refs = handler.advertise_refs self._wants = [] self.shallow = set() self.client_shallow = set() self.unshallow = set() self._cached = False self._cache = [] self._cache_index = 0 self._impl = None def determine_wants(self, heads): """Determine the wants for a set of heads. The given heads are advertised to the client, who then specifies which refs he wants using 'want' lines. This portion of the protocol is the same regardless of ack type, and in fact is used to set the ack type of the ProtocolGraphWalker. If the client has the 'shallow' capability, this method also reads and responds to the 'shallow' and 'deepen' lines from the client. These are not part of the wants per se, but they set up necessary state for walking the graph. Additionally, later code depends on this method consuming everything up to the first 'have' line. Args: heads: a dict of refname->SHA1 to advertise Returns: a list of SHA1s requested by the client """ symrefs = self.get_symrefs() values = set(heads.values()) - if self.advertise_refs or not self.http_req: + if self.advertise_refs or not self.stateless_rpc: for i, (ref, sha) in enumerate(sorted(heads.items())): try: peeled_sha = self.get_peeled(ref) except KeyError: # Skip refs that are inaccessible # TODO(jelmer): Integrate with Repo.fetch_objects refs # logic. continue line = sha + b' ' + ref if not i: line += (b'\x00' + self.handler.capability_line( self.handler.capabilities() + symref_capabilities(symrefs.items()))) self.proto.write_pkt_line(line + b'\n') if peeled_sha != sha: self.proto.write_pkt_line( peeled_sha + b' ' + ref + ANNOTATED_TAG_SUFFIX + b'\n') # i'm done.. self.proto.write_pkt_line(None) if self.advertise_refs: return [] # Now client will sending want want want commands want = self.proto.read_pkt_line() if not want: return [] line, caps = extract_want_line_capabilities(want) self.handler.set_client_capabilities(caps) self.set_ack_type(ack_type(caps)) allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None) command, sha = _split_proto_line(line, allowed) want_revs = [] while command == COMMAND_WANT: if sha not in values: raise GitProtocolError( 'Client wants invalid object %s' % sha) want_revs.append(sha) command, sha = self.read_proto_line(allowed) self.set_wants(want_revs) if command in (COMMAND_SHALLOW, COMMAND_DEEPEN): self.unread_proto_line(command, sha) self._handle_shallow_request(want_revs) - if self.http_req and self.proto.eof(): + if self.stateless_rpc and self.proto.eof(): # The client may close the socket at this point, expecting a # flush-pkt from the server. We might be ready to send a packfile # at this point, so we need to explicitly short-circuit in this # case. return [] return want_revs def unread_proto_line(self, command, value): if isinstance(value, int): value = str(value).encode('ascii') self.proto.unread_pkt_line(command + b' ' + value) def ack(self, have_ref): if len(have_ref) != 40: raise ValueError("invalid sha %r" % have_ref) return self._impl.ack(have_ref) def reset(self): self._cached = True self._cache_index = 0 def next(self): if not self._cached: - if not self._impl and self.http_req: + if not self._impl and self.stateless_rpc: return None return next(self._impl) self._cache_index += 1 if self._cache_index > len(self._cache): return None return self._cache[self._cache_index] __next__ = next def read_proto_line(self, allowed): """Read a line from the wire. Args: allowed: An iterable of command names that should be allowed. Returns: A tuple of (command, value); see _split_proto_line. Raises: UnexpectedCommandError: If an error occurred reading the line. """ return _split_proto_line(self.proto.read_pkt_line(), allowed) def _handle_shallow_request(self, wants): while True: command, val = self.read_proto_line( (COMMAND_DEEPEN, COMMAND_SHALLOW)) if command == COMMAND_DEEPEN: depth = val break self.client_shallow.add(val) self.read_proto_line((None,)) # consume client's flush-pkt shallow, not_shallow = _find_shallow(self.store, wants, depth) # Update self.shallow instead of reassigning it since we passed a # reference to it before this method was called. self.shallow.update(shallow - not_shallow) new_shallow = self.shallow - self.client_shallow unshallow = self.unshallow = not_shallow & self.client_shallow for sha in sorted(new_shallow): self.proto.write_pkt_line(COMMAND_SHALLOW + b' ' + sha) for sha in sorted(unshallow): self.proto.write_pkt_line(COMMAND_UNSHALLOW + b' ' + sha) self.proto.write_pkt_line(None) def notify_done(self): # relay the message down to the handler. self.handler.notify_done() def send_ack(self, sha, ack_type=b''): if ack_type: ack_type = b' ' + ack_type self.proto.write_pkt_line(b'ACK ' + sha + ack_type + b'\n') def send_nak(self): self.proto.write_pkt_line(b'NAK\n') def handle_done(self, done_required, done_received): # Delegate this to the implementation. return self._impl.handle_done(done_required, done_received) def set_wants(self, wants): self._wants = wants def all_wants_satisfied(self, haves): """Check whether all the current wants are satisfied by a set of haves. Args: haves: A set of commits we know the client has. Note: Wants are specified with set_wants rather than passed in since in the current interface they are determined outside this class. """ return _all_wants_satisfied(self.store, haves, self._wants) def set_ack_type(self, ack_type): impl_classes = { MULTI_ACK: MultiAckGraphWalkerImpl, MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl, SINGLE_ACK: SingleAckGraphWalkerImpl, } self._impl = impl_classes[ack_type](self) _GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None) class SingleAckGraphWalkerImpl(object): """Graph walker implementation that speaks the single-ack protocol.""" def __init__(self, walker): self.walker = walker self._common = [] def ack(self, have_ref): if not self._common: self.walker.send_ack(have_ref) self._common.append(have_ref) def next(self): command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS) if command in (None, COMMAND_DONE): # defer the handling of done self.walker.notify_done() return None elif command == COMMAND_HAVE: return sha __next__ = next def handle_done(self, done_required, done_received): if not self._common: self.walker.send_nak() if done_required and not done_received: # we are not done, especially when done is required; skip # the pack for this request and especially do not handle # the done. return False if not done_received and not self._common: # Okay we are not actually done then since the walker picked # up no haves. This is usually triggered when client attempts # to pull from a source that has no common base_commit. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\ # test_multi_ack_stateless_nodone return False return True class MultiAckGraphWalkerImpl(object): """Graph walker implementation that speaks the multi-ack protocol.""" def __init__(self, walker): self.walker = walker self._found_base = False self._common = [] def ack(self, have_ref): self._common.append(have_ref) if not self._found_base: self.walker.send_ack(have_ref, b'continue') if self.walker.all_wants_satisfied(self._common): self._found_base = True # else we blind ack within next def next(self): while True: command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS) if command is None: self.walker.send_nak() # in multi-ack mode, a flush-pkt indicates the client wants to # flush but more have lines are still coming continue elif command == COMMAND_DONE: self.walker.notify_done() return None elif command == COMMAND_HAVE: if self._found_base: # blind ack self.walker.send_ack(sha, b'continue') return sha __next__ = next def handle_done(self, done_required, done_received): if done_required and not done_received: # we are not done, especially when done is required; skip # the pack for this request and especially do not handle # the done. return False if not done_received and not self._common: # Okay we are not actually done then since the walker picked # up no haves. This is usually triggered when client attempts # to pull from a source that has no common base_commit. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\ # test_multi_ack_stateless_nodone return False # don't nak unless no common commits were found, even if not # everything is satisfied if self._common: self.walker.send_ack(self._common[-1]) else: self.walker.send_nak() return True class MultiAckDetailedGraphWalkerImpl(object): """Graph walker implementation speaking the multi-ack-detailed protocol.""" def __init__(self, walker): self.walker = walker self._common = [] def ack(self, have_ref): # Should only be called iff have_ref is common self._common.append(have_ref) self.walker.send_ack(have_ref, b'common') def next(self): while True: command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS) if command is None: if self.walker.all_wants_satisfied(self._common): self.walker.send_ack(self._common[-1], b'ready') self.walker.send_nak() - if self.walker.http_req: + if self.walker.stateless_rpc: # The HTTP version of this request a flush-pkt always # signifies an end of request, so we also return # nothing here as if we are done (but not really, as # it depends on whether no-done capability was # specified and that's handled in handle_done which # may or may not call post_nodone_check depending on # that). return None elif command == COMMAND_DONE: # Let the walker know that we got a done. self.walker.notify_done() break elif command == COMMAND_HAVE: # return the sha and let the caller ACK it with the # above ack method. return sha # don't nak unless no common commits were found, even if not # everything is satisfied __next__ = next def handle_done(self, done_required, done_received): if done_required and not done_received: # we are not done, especially when done is required; skip # the pack for this request and especially do not handle # the done. return False if not done_received and not self._common: # Okay we are not actually done then since the walker picked # up no haves. This is usually triggered when client attempts # to pull from a source that has no common base_commit. # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\ # test_multi_ack_stateless_nodone return False # don't nak unless no common commits were found, even if not # everything is satisfied if self._common: self.walker.send_ack(self._common[-1]) else: self.walker.send_nak() return True class ReceivePackHandler(PackHandler): """Protocol handler for downloading a pack from the client.""" - def __init__(self, backend, args, proto, http_req=None, + def __init__(self, backend, args, proto, stateless_rpc=None, advertise_refs=False): super(ReceivePackHandler, self).__init__( - backend, proto, http_req=http_req) + backend, proto, stateless_rpc=stateless_rpc) self.repo = backend.open_repository(args[0]) self.advertise_refs = advertise_refs @classmethod def capabilities(cls) -> Iterable[bytes]: return [CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS, CAPABILITY_QUIET, CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K, CAPABILITY_NO_DONE] def _apply_pack( self, refs: List[Tuple[bytes, bytes, bytes]] ) -> List[Tuple[bytes, bytes]]: all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError, AssertionError, socket.error, zlib.error, ObjectFormatException) status = [] will_send_pack = False for command in refs: if command[1] != ZERO_SHA: will_send_pack = True if will_send_pack: # TODO: more informative error messages than just the exception # string try: recv = getattr(self.proto, "recv", None) self.repo.object_store.add_thin_pack(self.proto.read, recv) status.append((b'unpack', b'ok')) except all_exceptions as e: status.append( (b'unpack', str(e).replace('\n', '').encode('utf-8'))) # The pack may still have been moved in, but it may contain # broken objects. We trust a later GC to clean it up. else: # The git protocol want to find a status entry related to unpack # process even if no pack data has been sent. status.append((b'unpack', b'ok')) for oldsha, sha, ref in refs: ref_status = b'ok' try: if sha == ZERO_SHA: if CAPABILITY_DELETE_REFS not in self.capabilities(): raise GitProtocolError( 'Attempted to delete refs without delete-refs ' 'capability.') try: self.repo.refs.remove_if_equals(ref, oldsha) except all_exceptions: ref_status = b'failed to delete' else: try: self.repo.refs.set_if_equals(ref, oldsha, sha) except all_exceptions: ref_status = b'failed to write' except KeyError: ref_status = b'bad ref' status.append((ref, ref_status)) return status def _report_status(self, status: List[Tuple[bytes, bytes]]) -> None: if self.has_capability(CAPABILITY_SIDE_BAND_64K): writer = BufferedPktLineWriter( lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d)) write = writer.write def flush(): writer.flush() self.proto.write_pkt_line(None) else: write = self.proto.write_pkt_line def flush(): pass for name, msg in status: if name == b'unpack': write(b'unpack ' + msg + b'\n') elif msg == b'ok': write(b'ok ' + name + b'\n') else: write(b'ng ' + name + b' ' + msg + b'\n') write(None) flush() def _on_post_receive(self, client_refs): hook = self.repo.hooks.get('post-receive', None) if not hook: return try: output = hook.execute(client_refs) if output: self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, output) except HookError as err: self.proto.write_sideband(SIDE_BAND_CHANNEL_FATAL, repr(err)) def handle(self) -> None: - if self.advertise_refs or not self.http_req: + if self.advertise_refs or not self.stateless_rpc: refs = sorted(self.repo.get_refs().items()) symrefs = sorted(self.repo.refs.get_symrefs().items()) if not refs: refs = [(CAPABILITIES_REF, ZERO_SHA)] self.proto.write_pkt_line( refs[0][1] + b' ' + refs[0][0] + b'\0' + self.capability_line( self.capabilities() + symref_capabilities(symrefs)) + b'\n') for i in range(1, len(refs)): ref = refs[i] self.proto.write_pkt_line(ref[1] + b' ' + ref[0] + b'\n') self.proto.write_pkt_line(None) if self.advertise_refs: return client_refs = [] ref = self.proto.read_pkt_line() # if ref is none then client doesnt want to send us anything.. if ref is None: return ref, caps = extract_capabilities(ref) self.set_client_capabilities(caps) # client will now send us a list of (oldsha, newsha, ref) while ref: client_refs.append(ref.split()) ref = self.proto.read_pkt_line() # backend can now deal with this refs and read a pack using self.read status = self._apply_pack(client_refs) self._on_post_receive(client_refs) # when we have read all the pack from the client, send a status report # if the client asked for it if self.has_capability(CAPABILITY_REPORT_STATUS): self._report_status(status) class UploadArchiveHandler(Handler): - def __init__(self, backend, args, proto, http_req=None): - super(UploadArchiveHandler, self).__init__(backend, proto, http_req) + def __init__(self, backend, args, proto, stateless_rpc=None): + super(UploadArchiveHandler, self).__init__( + backend, proto, stateless_rpc) self.repo = backend.open_repository(args[0]) def handle(self): def write(x): return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x) arguments = [] for pkt in self.proto.read_pkt_seq(): (key, value) = pkt.split(b' ', 1) if key != b'argument': raise GitProtocolError('unknown command %s' % key) arguments.append(value.rstrip(b'\n')) prefix = b'' format = 'tar' i = 0 store = self.repo.object_store while i < len(arguments): argument = arguments[i] if argument == b'--prefix': i += 1 prefix = arguments[i] elif argument == b'--format': i += 1 format = arguments[i].decode('ascii') else: commit_sha = self.repo.refs[argument] tree = store[store[commit_sha].tree] i += 1 self.proto.write_pkt_line(b'ACK\n') self.proto.write_pkt_line(None) for chunk in tar_stream( store, tree, mtime=time.time(), prefix=prefix, format=format): write(chunk) self.proto.write_pkt_line(None) # Default handler classes for git services. DEFAULT_HANDLERS = { b'git-upload-pack': UploadPackHandler, b'git-receive-pack': ReceivePackHandler, b'git-upload-archive': UploadArchiveHandler, } class TCPGitRequestHandler(socketserver.StreamRequestHandler): def __init__(self, handlers, *args, **kwargs): self.handlers = handlers socketserver.StreamRequestHandler.__init__(self, *args, **kwargs) def handle(self): proto = ReceivableProtocol(self.connection.recv, self.wfile.write) command, args = proto.read_cmd() logger.info('Handling %s request, args=%s', command, args) cls = self.handlers.get(command, None) if not callable(cls): raise GitProtocolError('Invalid service %s' % command) h = cls(self.server.backend, args, proto) h.handle() class TCPGitServer(socketserver.TCPServer): allow_reuse_address = True serve = socketserver.TCPServer.serve_forever def _make_handler(self, *args, **kwargs): return TCPGitRequestHandler(self.handlers, *args, **kwargs) def __init__(self, backend, listen_addr, port=TCP_GIT_PORT, handlers=None): self.handlers = dict(DEFAULT_HANDLERS) if handlers is not None: self.handlers.update(handlers) self.backend = backend logger.info('Listening for TCP connections on %s:%d', listen_addr, port) socketserver.TCPServer.__init__(self, (listen_addr, port), self._make_handler) def verify_request(self, request, client_address): logger.info('Handling request from %s', client_address) return True def handle_error(self, request, client_address): logger.exception('Exception happened during processing of request ' 'from %s', client_address) def main(argv=sys.argv): """Entry point for starting a TCP git server.""" import optparse parser = optparse.OptionParser() parser.add_option("-l", "--listen_address", dest="listen_address", default="localhost", help="Binding IP address.") parser.add_option("-p", "--port", dest="port", type=int, default=TCP_GIT_PORT, help="Binding TCP port.") options, args = parser.parse_args(argv) log_utils.default_logging_config() if len(args) > 1: gitdir = args[1] else: gitdir = '.' # TODO(jelmer): Support git-daemon-export-ok and --export-all. backend = FileSystemBackend(gitdir) server = TCPGitServer(backend, options.listen_address, options.port) server.serve_forever() def serve_command(handler_cls, argv=sys.argv, backend=None, inf=sys.stdin, outf=sys.stdout): """Serve a single command. This is mostly useful for the implementation of commands used by e.g. git+ssh. Args: handler_cls: `Handler` class to use for the request argv: execv-style command-line arguments. Defaults to sys.argv. backend: `Backend` to use inf: File-like object to read from, defaults to standard input. outf: File-like object to write to, defaults to standard output. Returns: Exit code for use with sys.exit. 0 on success, 1 on failure. """ if backend is None: backend = FileSystemBackend() def send_fn(data): outf.write(data) outf.flush() proto = Protocol(inf.read, send_fn) handler = handler_cls(backend, argv[1:], proto) # FIXME: Catch exceptions and write a single-line summary to outf. handler.handle() return 0 def generate_info_refs(repo): """Generate an info refs file.""" refs = repo.get_refs() return write_info_refs(refs, repo.object_store) def generate_objects_info_packs(repo): """Generate an index for for packs.""" for pack in repo.object_store.packs: yield ( b'P ' + os.fsencode(pack.data.filename) + b'\n') def update_server_info(repo): """Generate server info for dumb file access. This generates info/refs and objects/info/packs, similar to "git update-server-info". """ repo._put_named_file( os.path.join('info', 'refs'), b"".join(generate_info_refs(repo))) repo._put_named_file( os.path.join('objects', 'info', 'packs'), b"".join(generate_objects_info_packs(repo))) if __name__ == '__main__': main() diff --git a/dulwich/tests/test_server.py b/dulwich/tests/test_server.py index c726d694..5bdcc284 100644 --- a/dulwich/tests/test_server.py +++ b/dulwich/tests/test_server.py @@ -1,1112 +1,1112 @@ # test_server.py -- Tests for the git server # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for the smart protocol server.""" from io import BytesIO import os import shutil import tempfile import sys from dulwich.errors import ( GitProtocolError, NotGitRepository, UnexpectedCommandError, HangupException, ) from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.repo import ( MemoryRepo, Repo, ) from dulwich.server import ( Backend, DictBackend, FileSystemBackend, MultiAckGraphWalkerImpl, MultiAckDetailedGraphWalkerImpl, PackHandler, _split_proto_line, serve_command, _find_shallow, _ProtocolGraphWalker, ReceivePackHandler, SingleAckGraphWalkerImpl, UploadPackHandler, update_server_info, ) from dulwich.tests import TestCase from dulwich.tests.utils import ( make_commit, make_tag, ) from dulwich.protocol import ( ZERO_SHA, ) ONE = b'1' * 40 TWO = b'2' * 40 THREE = b'3' * 40 FOUR = b'4' * 40 FIVE = b'5' * 40 SIX = b'6' * 40 class TestProto(object): def __init__(self): self._output = [] self._received = {0: [], 1: [], 2: [], 3: []} def set_output(self, output_lines): self._output = output_lines def read_pkt_line(self): if self._output: data = self._output.pop(0) if data is not None: return data.rstrip() + b'\n' else: # flush-pkt ('0000'). return None else: raise HangupException() def write_sideband(self, band, data): self._received[band].append(data) def write_pkt_line(self, data): self._received[0].append(data) def get_received_line(self, band=0): lines = self._received[band] return lines.pop(0) class TestGenericPackHandler(PackHandler): def __init__(self): PackHandler.__init__(self, Backend(), None) @classmethod def capabilities(cls): return [b'cap1', b'cap2', b'cap3'] @classmethod def required_capabilities(cls): return [b'cap2'] class HandlerTestCase(TestCase): def setUp(self): super(HandlerTestCase, self).setUp() self._handler = TestGenericPackHandler() def assertSucceeds(self, func, *args, **kwargs): try: func(*args, **kwargs) except GitProtocolError as e: self.fail(e) def test_capability_line(self): self.assertEqual( b' cap1 cap2 cap3', self._handler.capability_line([b'cap1', b'cap2', b'cap3'])) def test_set_client_capabilities(self): set_caps = self._handler.set_client_capabilities self.assertSucceeds(set_caps, [b'cap2']) self.assertSucceeds(set_caps, [b'cap1', b'cap2']) # different order self.assertSucceeds(set_caps, [b'cap3', b'cap1', b'cap2']) # error cases self.assertRaises(GitProtocolError, set_caps, [b'capxxx', b'cap2']) self.assertRaises(GitProtocolError, set_caps, [b'cap1', b'cap3']) # ignore innocuous but unknown capabilities self.assertRaises(GitProtocolError, set_caps, [b'cap2', b'ignoreme']) self.assertFalse(b'ignoreme' in self._handler.capabilities()) self._handler.innocuous_capabilities = lambda: (b'ignoreme',) self.assertSucceeds(set_caps, [b'cap2', b'ignoreme']) def test_has_capability(self): self.assertRaises(GitProtocolError, self._handler.has_capability, b'cap') caps = self._handler.capabilities() self._handler.set_client_capabilities(caps) for cap in caps: self.assertTrue(self._handler.has_capability(cap)) self.assertFalse(self._handler.has_capability(b'capxxx')) class UploadPackHandlerTestCase(TestCase): def setUp(self): super(UploadPackHandlerTestCase, self).setUp() self._repo = MemoryRepo.init_bare([], {}) backend = DictBackend({b'/': self._repo}) self._handler = UploadPackHandler( backend, [b'/', b'host=lolcathost'], TestProto()) def test_progress(self): caps = self._handler.required_capabilities() self._handler.set_client_capabilities(caps) self._handler.progress(b'first message') self._handler.progress(b'second message') self.assertEqual(b'first message', self._handler.proto.get_received_line(2)) self.assertEqual(b'second message', self._handler.proto.get_received_line(2)) self.assertRaises(IndexError, self._handler.proto.get_received_line, 2) def test_no_progress(self): caps = list(self._handler.required_capabilities()) + [b'no-progress'] self._handler.set_client_capabilities(caps) self._handler.progress(b'first message') self._handler.progress(b'second message') self.assertRaises(IndexError, self._handler.proto.get_received_line, 2) def test_get_tagged(self): refs = { b'refs/tags/tag1': ONE, b'refs/tags/tag2': TWO, b'refs/heads/master': FOUR, # not a tag, no peeled value } # repo needs to peel this object self._repo.object_store.add_object(make_commit(id=FOUR)) self._repo.refs._update(refs) peeled = { b'refs/tags/tag1': b'1234' * 10, b'refs/tags/tag2': b'5678' * 10, } self._repo.refs._update_peeled(peeled) caps = list(self._handler.required_capabilities()) + [b'include-tag'] self._handler.set_client_capabilities(caps) self.assertEqual({b'1234' * 10: ONE, b'5678' * 10: TWO}, self._handler.get_tagged(refs, repo=self._repo)) # non-include-tag case caps = self._handler.required_capabilities() self._handler.set_client_capabilities(caps) self.assertEqual({}, self._handler.get_tagged(refs, repo=self._repo)) class FindShallowTests(TestCase): def setUp(self): super(FindShallowTests, self).setUp() self._store = MemoryObjectStore() def make_commit(self, **attrs): commit = make_commit(**attrs) self._store.add_object(commit) return commit def make_linear_commits(self, n, message=b''): commits = [] parents = [] for _ in range(n): commits.append(self.make_commit(parents=parents, message=message)) parents = [commits[-1].id] return commits def assertSameElements(self, expected, actual): self.assertEqual(set(expected), set(actual)) def test_linear(self): c1, c2, c3 = self.make_linear_commits(3) self.assertEqual((set([c3.id]), set([])), _find_shallow(self._store, [c3.id], 1)) self.assertEqual((set([c2.id]), set([c3.id])), _find_shallow(self._store, [c3.id], 2)) self.assertEqual((set([c1.id]), set([c2.id, c3.id])), _find_shallow(self._store, [c3.id], 3)) self.assertEqual((set([]), set([c1.id, c2.id, c3.id])), _find_shallow(self._store, [c3.id], 4)) def test_multiple_independent(self): a = self.make_linear_commits(2, message=b'a') b = self.make_linear_commits(2, message=b'b') c = self.make_linear_commits(2, message=b'c') heads = [a[1].id, b[1].id, c[1].id] self.assertEqual((set([a[0].id, b[0].id, c[0].id]), set(heads)), _find_shallow(self._store, heads, 2)) def test_multiple_overlapping(self): # Create the following commit tree: # 1--2 # \ # 3--4 c1, c2 = self.make_linear_commits(2) c3 = self.make_commit(parents=[c1.id]) c4 = self.make_commit(parents=[c3.id]) # 1 is shallow along the path from 4, but not along the path from 2. self.assertEqual((set([c1.id]), set([c1.id, c2.id, c3.id, c4.id])), _find_shallow(self._store, [c2.id, c4.id], 3)) def test_merge(self): c1 = self.make_commit() c2 = self.make_commit() c3 = self.make_commit(parents=[c1.id, c2.id]) self.assertEqual((set([c1.id, c2.id]), set([c3.id])), _find_shallow(self._store, [c3.id], 2)) def test_tag(self): c1, c2 = self.make_linear_commits(2) tag = make_tag(c2, name=b'tag') self._store.add_object(tag) self.assertEqual((set([c1.id]), set([c2.id])), _find_shallow(self._store, [tag.id], 2)) class TestUploadPackHandler(UploadPackHandler): @classmethod def required_capabilities(self): return [] class ReceivePackHandlerTestCase(TestCase): def setUp(self): super(ReceivePackHandlerTestCase, self).setUp() self._repo = MemoryRepo.init_bare([], {}) backend = DictBackend({b'/': self._repo}) self._handler = ReceivePackHandler( backend, [b'/', b'host=lolcathost'], TestProto()) def test_apply_pack_del_ref(self): refs = { b'refs/heads/master': TWO, b'refs/heads/fake-branch': ONE} self._repo.refs._update(refs) update_refs = [[ONE, ZERO_SHA, b'refs/heads/fake-branch'], ] self._handler.set_client_capabilities([b'delete-refs']) status = self._handler._apply_pack(update_refs) self.assertEqual(status[0][0], b'unpack') self.assertEqual(status[0][1], b'ok') self.assertEqual(status[1][0], b'refs/heads/fake-branch') self.assertEqual(status[1][1], b'ok') class ProtocolGraphWalkerEmptyTestCase(TestCase): def setUp(self): super(ProtocolGraphWalkerEmptyTestCase, self).setUp() self._repo = MemoryRepo.init_bare([], {}) backend = DictBackend({b'/': self._repo}) self._walker = _ProtocolGraphWalker( TestUploadPackHandler(backend, [b'/', b'host=lolcats'], TestProto()), self._repo.object_store, self._repo.get_peeled, self._repo.refs.get_symrefs) def test_empty_repository(self): # The server should wait for a flush packet. self._walker.proto.set_output([]) self.assertRaises(HangupException, self._walker.determine_wants, {}) self.assertEqual(None, self._walker.proto.get_received_line()) self._walker.proto.set_output([None]) self.assertEqual([], self._walker.determine_wants({})) self.assertEqual(None, self._walker.proto.get_received_line()) class ProtocolGraphWalkerTestCase(TestCase): def setUp(self): super(ProtocolGraphWalkerTestCase, self).setUp() # Create the following commit tree: # 3---5 # / # 1---2---4 commits = [ make_commit(id=ONE, parents=[], commit_time=111), make_commit(id=TWO, parents=[ONE], commit_time=222), make_commit(id=THREE, parents=[ONE], commit_time=333), make_commit(id=FOUR, parents=[TWO], commit_time=444), make_commit(id=FIVE, parents=[THREE], commit_time=555), ] self._repo = MemoryRepo.init_bare(commits, {}) backend = DictBackend({b'/': self._repo}) self._walker = _ProtocolGraphWalker( TestUploadPackHandler(backend, [b'/', b'host=lolcats'], TestProto()), self._repo.object_store, self._repo.get_peeled, self._repo.refs.get_symrefs) def test_all_wants_satisfied_no_haves(self): self._walker.set_wants([ONE]) self.assertFalse(self._walker.all_wants_satisfied([])) self._walker.set_wants([TWO]) self.assertFalse(self._walker.all_wants_satisfied([])) self._walker.set_wants([THREE]) self.assertFalse(self._walker.all_wants_satisfied([])) def test_all_wants_satisfied_have_root(self): self._walker.set_wants([ONE]) self.assertTrue(self._walker.all_wants_satisfied([ONE])) self._walker.set_wants([TWO]) self.assertTrue(self._walker.all_wants_satisfied([ONE])) self._walker.set_wants([THREE]) self.assertTrue(self._walker.all_wants_satisfied([ONE])) def test_all_wants_satisfied_have_branch(self): self._walker.set_wants([TWO]) self.assertTrue(self._walker.all_wants_satisfied([TWO])) # wrong branch self._walker.set_wants([THREE]) self.assertFalse(self._walker.all_wants_satisfied([TWO])) def test_all_wants_satisfied(self): self._walker.set_wants([FOUR, FIVE]) # trivial case: wants == haves self.assertTrue(self._walker.all_wants_satisfied([FOUR, FIVE])) # cases that require walking the commit tree self.assertTrue(self._walker.all_wants_satisfied([ONE])) self.assertFalse(self._walker.all_wants_satisfied([TWO])) self.assertFalse(self._walker.all_wants_satisfied([THREE])) self.assertTrue(self._walker.all_wants_satisfied([TWO, THREE])) def test_split_proto_line(self): allowed = (b'want', b'done', None) self.assertEqual((b'want', ONE), _split_proto_line(b'want ' + ONE + b'\n', allowed)) self.assertEqual((b'want', TWO), _split_proto_line(b'want ' + TWO + b'\n', allowed)) self.assertRaises(GitProtocolError, _split_proto_line, b'want xxxx\n', allowed) self.assertRaises(UnexpectedCommandError, _split_proto_line, b'have ' + THREE + b'\n', allowed) self.assertRaises(GitProtocolError, _split_proto_line, b'foo ' + FOUR + b'\n', allowed) self.assertRaises(GitProtocolError, _split_proto_line, b'bar', allowed) self.assertEqual((b'done', None), _split_proto_line(b'done\n', allowed)) self.assertEqual((None, None), _split_proto_line(b'', allowed)) def test_determine_wants(self): self._walker.proto.set_output([None]) self.assertEqual([], self._walker.determine_wants({})) self.assertEqual(None, self._walker.proto.get_received_line()) self._walker.proto.set_output([ b'want ' + ONE + b' multi_ack', b'want ' + TWO, None, ]) heads = { b'refs/heads/ref1': ONE, b'refs/heads/ref2': TWO, b'refs/heads/ref3': THREE, } self._repo.refs._update(heads) self.assertEqual([ONE, TWO], self._walker.determine_wants(heads)) self._walker.advertise_refs = True self.assertEqual([], self._walker.determine_wants(heads)) self._walker.advertise_refs = False self._walker.proto.set_output([b'want ' + FOUR + b' multi_ack', None]) self.assertRaises(GitProtocolError, self._walker.determine_wants, heads) self._walker.proto.set_output([None]) self.assertEqual([], self._walker.determine_wants(heads)) self._walker.proto.set_output( [b'want ' + ONE + b' multi_ack', b'foo', None]) self.assertRaises(GitProtocolError, self._walker.determine_wants, heads) self._walker.proto.set_output([b'want ' + FOUR + b' multi_ack', None]) self.assertRaises(GitProtocolError, self._walker.determine_wants, heads) def test_determine_wants_advertisement(self): self._walker.proto.set_output([None]) # advertise branch tips plus tag heads = { b'refs/heads/ref4': FOUR, b'refs/heads/ref5': FIVE, b'refs/heads/tag6': SIX, } self._repo.refs._update(heads) self._repo.refs._update_peeled(heads) self._repo.refs._update_peeled({b'refs/heads/tag6': FIVE}) self._walker.determine_wants(heads) lines = [] while True: line = self._walker.proto.get_received_line() if line is None: break # strip capabilities list if present if b'\x00' in line: line = line[:line.index(b'\x00')] lines.append(line.rstrip()) self.assertEqual([ FOUR + b' refs/heads/ref4', FIVE + b' refs/heads/ref5', FIVE + b' refs/heads/tag6^{}', SIX + b' refs/heads/tag6', ], sorted(lines)) # ensure peeled tag was advertised immediately following tag for i, line in enumerate(lines): if line.endswith(b' refs/heads/tag6'): self.assertEqual(FIVE + b' refs/heads/tag6^{}', lines[i+1]) # TODO: test commit time cutoff def _handle_shallow_request(self, lines, heads): self._walker.proto.set_output(lines + [None]) self._walker._handle_shallow_request(heads) def assertReceived(self, expected): self.assertEqual( expected, list(iter(self._walker.proto.get_received_line, None))) def test_handle_shallow_request_no_client_shallows(self): self._handle_shallow_request([b'deepen 2\n'], [FOUR, FIVE]) self.assertEqual(set([TWO, THREE]), self._walker.shallow) self.assertReceived([ b'shallow ' + TWO, b'shallow ' + THREE, ]) def test_handle_shallow_request_no_new_shallows(self): lines = [ b'shallow ' + TWO + b'\n', b'shallow ' + THREE + b'\n', b'deepen 2\n', ] self._handle_shallow_request(lines, [FOUR, FIVE]) self.assertEqual(set([TWO, THREE]), self._walker.shallow) self.assertReceived([]) def test_handle_shallow_request_unshallows(self): lines = [ b'shallow ' + TWO + b'\n', b'deepen 3\n', ] self._handle_shallow_request(lines, [FOUR, FIVE]) self.assertEqual(set([ONE]), self._walker.shallow) self.assertReceived([ b'shallow ' + ONE, b'unshallow ' + TWO, # THREE is unshallow but was is not shallow in the client ]) class TestProtocolGraphWalker(object): def __init__(self): self.acks = [] self.lines = [] self.wants_satisified = False - self.http_req = None + self.stateless_rpc = None self.advertise_refs = False self._impl = None self.done_required = True self.done_received = False self._empty = False self.pack_sent = False def read_proto_line(self, allowed): command, sha = self.lines.pop(0) if allowed is not None: assert command in allowed return command, sha def send_ack(self, sha, ack_type=b''): self.acks.append((sha, ack_type)) def send_nak(self): self.acks.append((None, b'nak')) def all_wants_satisfied(self, haves): if haves: return self.wants_satisified def pop_ack(self): if not self.acks: return None return self.acks.pop(0) def handle_done(self): if not self._impl: return # Whether or not PACK is sent after is determined by this, so # record this value. self.pack_sent = self._impl.handle_done( self.done_required, self.done_received) return self.pack_sent def notify_done(self): self.done_received = True class AckGraphWalkerImplTestCase(TestCase): """Base setup and asserts for AckGraphWalker tests.""" def setUp(self): super(AckGraphWalkerImplTestCase, self).setUp() self._walker = TestProtocolGraphWalker() self._walker.lines = [ (b'have', TWO), (b'have', ONE), (b'have', THREE), (b'done', None), ] self._impl = self.impl_cls(self._walker) self._walker._impl = self._impl def assertNoAck(self): self.assertEqual(None, self._walker.pop_ack()) def assertAcks(self, acks): for sha, ack_type in acks: self.assertEqual((sha, ack_type), self._walker.pop_ack()) self.assertNoAck() def assertAck(self, sha, ack_type=b''): self.assertAcks([(sha, ack_type)]) def assertNak(self): self.assertAck(None, b'nak') def assertNextEquals(self, sha): self.assertEqual(sha, next(self._impl)) def assertNextEmpty(self): # This is necessary because of no-done - the assumption that it # it safe to immediately send out the final ACK is no longer # true but the test is still needed for it. TestProtocolWalker # does implement the handle_done which will determine whether # the final confirmation can be sent. self.assertRaises(IndexError, next, self._impl) self._walker.handle_done() class SingleAckGraphWalkerImplTestCase(AckGraphWalkerImplTestCase): impl_cls = SingleAckGraphWalkerImpl def test_single_ack(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE) self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNoAck() def test_single_ack_flush(self): # same as ack test but ends with a flush-pkt instead of done self._walker.lines[-1] = (None, None) self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE) self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNoAck() def test_single_ack_nak(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertNak() def test_single_ack_nak_flush(self): # same as nak test but ends with a flush-pkt instead of done self._walker.lines[-1] = (None, None) self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertNak() class MultiAckGraphWalkerImplTestCase(AckGraphWalkerImplTestCase): impl_cls = MultiAckGraphWalkerImpl def test_multi_ack(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'continue') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'continue') self.assertNextEquals(None) self.assertNextEmpty() self.assertAck(THREE) def test_multi_ack_partial(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'continue') self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertAck(ONE) def test_multi_ack_flush(self): self._walker.lines = [ (b'have', TWO), (None, None), (b'have', ONE), (b'have', THREE), (b'done', None), ] self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNak() # nak the flush-pkt self._impl.ack(ONE) self.assertAck(ONE, b'continue') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'continue') self.assertNextEquals(None) self.assertNextEmpty() self.assertAck(THREE) def test_multi_ack_nak(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertNak() class MultiAckDetailedGraphWalkerImplTestCase(AckGraphWalkerImplTestCase): impl_cls = MultiAckDetailedGraphWalkerImpl def test_multi_ack(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'common') # done is read. self._walker.wants_satisified = True self.assertNextEquals(None) self._walker.lines.append((None, None)) self.assertNextEmpty() self.assertAcks([(THREE, b'ready'), (None, b'nak'), (THREE, b'')]) # PACK is sent self.assertTrue(self._walker.pack_sent) def test_multi_ack_nodone(self): self._walker.done_required = False self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'common') # done is read. self._walker.wants_satisified = True self.assertNextEquals(None) self._walker.lines.append((None, None)) self.assertNextEmpty() self.assertAcks([(THREE, b'ready'), (None, b'nak'), (THREE, b'')]) # PACK is sent self.assertTrue(self._walker.pack_sent) def test_multi_ack_flush_end(self): # transmission ends with a flush-pkt without a done but no-done is # assumed. self._walker.lines[-1] = (None, None) self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'common') # no done is read self._walker.wants_satisified = True self.assertNextEmpty() self.assertAcks([(THREE, b'ready'), (None, b'nak')]) # PACK is NOT sent self.assertFalse(self._walker.pack_sent) def test_multi_ack_flush_end_nodone(self): # transmission ends with a flush-pkt without a done but no-done is # assumed. self._walker.lines[-1] = (None, None) self._walker.done_required = False self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'common') # no done is read, but pretend it is (last 'ACK 'commit_id' '') self._walker.wants_satisified = True self.assertNextEmpty() self.assertAcks([(THREE, b'ready'), (None, b'nak'), (THREE, b'')]) # PACK is sent self.assertTrue(self._walker.pack_sent) def test_multi_ack_partial(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertAck(ONE) def test_multi_ack_flush(self): # same as ack test but contains a flush-pkt in the middle self._walker.lines = [ (b'have', TWO), (None, None), (b'have', ONE), (b'have', THREE), (b'done', None), (None, None), ] self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNak() # nak the flush-pkt self._impl.ack(ONE) self.assertAck(ONE, b'common') self.assertNextEquals(THREE) self._impl.ack(THREE) self.assertAck(THREE, b'common') self._walker.wants_satisified = True self.assertNextEquals(None) self.assertNextEmpty() self.assertAcks([(THREE, b'ready'), (None, b'nak'), (THREE, b'')]) def test_multi_ack_nak(self): self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() # Done is sent here. self.assertNextEquals(None) self.assertNextEmpty() self.assertNak() self.assertNextEmpty() self.assertTrue(self._walker.pack_sent) def test_multi_ack_nak_nodone(self): self._walker.done_required = False self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() # Done is sent here. self.assertFalse(self._walker.pack_sent) self.assertNextEquals(None) self.assertNextEmpty() self.assertTrue(self._walker.pack_sent) self.assertNak() self.assertNextEmpty() def test_multi_ack_nak_flush(self): # same as nak test but contains a flush-pkt in the middle self._walker.lines = [ (b'have', TWO), (None, None), (b'have', ONE), (b'have', THREE), (b'done', None), ] self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNak() self.assertNextEquals(THREE) self.assertNoAck() self.assertNextEquals(None) self.assertNextEmpty() self.assertNak() def test_multi_ack_stateless(self): # transmission ends with a flush-pkt self._walker.lines[-1] = (None, None) - self._walker.http_req = True + self._walker.stateless_rpc = True self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() self.assertFalse(self._walker.pack_sent) self.assertNextEquals(None) self.assertNak() self.assertNextEmpty() self.assertNoAck() self.assertFalse(self._walker.pack_sent) def test_multi_ack_stateless_nodone(self): self._walker.done_required = False # transmission ends with a flush-pkt self._walker.lines[-1] = (None, None) - self._walker.http_req = True + self._walker.stateless_rpc = True self.assertNextEquals(TWO) self.assertNoAck() self.assertNextEquals(ONE) self.assertNoAck() self.assertNextEquals(THREE) self.assertNoAck() self.assertFalse(self._walker.pack_sent) self.assertNextEquals(None) self.assertNak() self.assertNextEmpty() self.assertNoAck() # PACK will still not be sent. self.assertFalse(self._walker.pack_sent) class FileSystemBackendTests(TestCase): """Tests for FileSystemBackend.""" def setUp(self): super(FileSystemBackendTests, self).setUp() self.path = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.path) self.repo = Repo.init(self.path) if sys.platform == 'win32': self.backend = FileSystemBackend(self.path[0] + ':' + os.sep) else: self.backend = FileSystemBackend() def test_nonexistant(self): self.assertRaises(NotGitRepository, self.backend.open_repository, "/does/not/exist/unless/foo") def test_absolute(self): repo = self.backend.open_repository(self.path) self.assertTrue(os.path.samefile( os.path.abspath(repo.path), os.path.abspath(self.repo.path))) def test_child(self): self.assertRaises( NotGitRepository, self.backend.open_repository, os.path.join(self.path, "foo")) def test_bad_repo_path(self): backend = FileSystemBackend() self.assertRaises(NotGitRepository, lambda: backend.open_repository('/ups')) class DictBackendTests(TestCase): """Tests for DictBackend.""" def test_nonexistant(self): repo = MemoryRepo.init_bare([], {}) backend = DictBackend({b'/': repo}) self.assertRaises( NotGitRepository, backend.open_repository, "/does/not/exist/unless/foo") def test_bad_repo_path(self): repo = MemoryRepo.init_bare([], {}) backend = DictBackend({b'/': repo}) self.assertRaises(NotGitRepository, lambda: backend.open_repository('/ups')) class ServeCommandTests(TestCase): """Tests for serve_command.""" def setUp(self): super(ServeCommandTests, self).setUp() self.backend = DictBackend({}) def serve_command(self, handler_cls, args, inf, outf): return serve_command( handler_cls, [b"test"] + args, backend=self.backend, inf=inf, outf=outf) def test_receive_pack(self): commit = make_commit(id=ONE, parents=[], commit_time=111) self.backend.repos[b"/"] = MemoryRepo.init_bare( [commit], {b"refs/heads/master": commit.id}) outf = BytesIO() exitcode = self.serve_command(ReceivePackHandler, [b"/"], BytesIO(b"0000"), outf) outlines = outf.getvalue().splitlines() self.assertEqual(2, len(outlines)) self.assertEqual( b"1111111111111111111111111111111111111111 refs/heads/master", outlines[0][4:].split(b"\x00")[0]) self.assertEqual(b"0000", outlines[-1]) self.assertEqual(0, exitcode) class UpdateServerInfoTests(TestCase): """Tests for update_server_info.""" def setUp(self): super(UpdateServerInfoTests, self).setUp() self.path = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.path) self.repo = Repo.init(self.path) def test_empty(self): update_server_info(self.repo) with open(os.path.join(self.path, ".git", "info", "refs"), 'rb') as f: self.assertEqual(b'', f.read()) p = os.path.join(self.path, ".git", "objects", "info", "packs") with open(p, 'rb') as f: self.assertEqual(b'', f.read()) def test_simple(self): commit_id = self.repo.do_commit( message=b"foo", committer=b"Joe Example ", ref=b"refs/heads/foo") update_server_info(self.repo) with open(os.path.join(self.path, ".git", "info", "refs"), 'rb') as f: self.assertEqual(f.read(), commit_id + b'\trefs/heads/foo\n') p = os.path.join(self.path, ".git", "objects", "info", "packs") with open(p, 'rb') as f: self.assertEqual(f.read(), b'') diff --git a/dulwich/tests/test_web.py b/dulwich/tests/test_web.py index deff10cc..7d332439 100644 --- a/dulwich/tests/test_web.py +++ b/dulwich/tests/test_web.py @@ -1,573 +1,573 @@ # test_web.py -- Tests for the git HTTP server # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for the Git HTTP server.""" from io import BytesIO import gzip import re import os from typing import Type from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Blob, ) from dulwich.repo import ( BaseRepo, MemoryRepo, ) from dulwich.server import ( DictBackend, ) from dulwich.tests import ( TestCase, ) from dulwich.web import ( HTTP_OK, HTTP_NOT_FOUND, HTTP_FORBIDDEN, HTTP_ERROR, GunzipFilter, send_file, get_text_file, get_loose_object, get_pack_file, get_idx_file, get_info_refs, get_info_packs, handle_service_request, _LengthLimitedFile, HTTPGitRequest, HTTPGitApplication, ) from dulwich.tests.utils import ( make_object, make_tag, ) class MinimalistWSGIInputStream(object): """WSGI input stream with no 'seek()' and 'tell()' methods.""" def __init__(self, data): self.data = data self.pos = 0 def read(self, howmuch): start = self.pos end = self.pos + howmuch if start >= len(self.data): return '' self.pos = end return self.data[start:end] class MinimalistWSGIInputStream2(MinimalistWSGIInputStream): """WSGI input stream with no *working* 'seek()' and 'tell()' methods.""" def seek(self, pos): raise NotImplementedError def tell(self): raise NotImplementedError class TestHTTPGitRequest(HTTPGitRequest): """HTTPGitRequest with overridden methods to help test caching.""" def __init__(self, *args, **kwargs): HTTPGitRequest.__init__(self, *args, **kwargs) self.cached = None def nocache(self): self.cached = False def cache_forever(self): self.cached = True class WebTestCase(TestCase): """Base TestCase with useful instance vars and utility functions.""" _req_class = TestHTTPGitRequest # type: Type[HTTPGitRequest] def setUp(self): super(WebTestCase, self).setUp() self._environ = {} self._req = self._req_class(self._environ, self._start_response, handlers=self._handlers()) self._status = None self._headers = [] self._output = BytesIO() def _start_response(self, status, headers): self._status = status self._headers = list(headers) return self._output.write def _handlers(self): return None def assertContentTypeEquals(self, expected): self.assertTrue(('Content-Type', expected) in self._headers) def _test_backend(objects, refs=None, named_files=None): if not refs: refs = {} if not named_files: named_files = {} repo = MemoryRepo.init_bare(objects, refs) for path, contents in named_files.items(): repo._put_named_file(path, contents) return DictBackend({'/': repo}) class DumbHandlersTestCase(WebTestCase): def test_send_file_not_found(self): list(send_file(self._req, None, 'text/plain')) self.assertEqual(HTTP_NOT_FOUND, self._status) def test_send_file(self): f = BytesIO(b'foobar') output = b''.join(send_file(self._req, f, 'some/thing')) self.assertEqual(b'foobar', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('some/thing') self.assertTrue(f.closed) def test_send_file_buffered(self): bufsize = 10240 xs = b'x' * bufsize f = BytesIO(2 * xs) self.assertEqual([xs, xs], list(send_file(self._req, f, 'some/thing'))) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('some/thing') self.assertTrue(f.closed) def test_send_file_error(self): class TestFile(object): def __init__(self, exc_class): self.closed = False self._exc_class = exc_class def read(self, size=-1): raise self._exc_class() def close(self): self.closed = True f = TestFile(IOError) list(send_file(self._req, f, 'some/thing')) self.assertEqual(HTTP_ERROR, self._status) self.assertTrue(f.closed) self.assertFalse(self._req.cached) # non-IOErrors are reraised f = TestFile(AttributeError) self.assertRaises(AttributeError, list, send_file(self._req, f, 'some/thing')) self.assertTrue(f.closed) self.assertFalse(self._req.cached) def test_get_text_file(self): backend = _test_backend([], named_files={'description': b'foo'}) mat = re.search('.*', 'description') output = b''.join(get_text_file(self._req, backend, mat)) self.assertEqual(b'foo', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) def test_get_loose_object(self): blob = make_object(Blob, data=b'foo') backend = _test_backend([blob]) mat = re.search('^(..)(.{38})$', blob.id.decode('ascii')) output = b''.join(get_loose_object(self._req, backend, mat)) self.assertEqual(blob.as_legacy_object(), output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-loose-object') self.assertTrue(self._req.cached) def test_get_loose_object_missing(self): mat = re.search('^(..)(.{38})$', '1' * 40) list(get_loose_object(self._req, _test_backend([]), mat)) self.assertEqual(HTTP_NOT_FOUND, self._status) def test_get_loose_object_error(self): blob = make_object(Blob, data=b'foo') backend = _test_backend([blob]) mat = re.search('^(..)(.{38})$', blob.id.decode('ascii')) def as_legacy_object_error(self): raise IOError self.addCleanup( setattr, Blob, 'as_legacy_object', Blob.as_legacy_object) Blob.as_legacy_object = as_legacy_object_error list(get_loose_object(self._req, backend, mat)) self.assertEqual(HTTP_ERROR, self._status) def test_get_pack_file(self): pack_name = os.path.join( 'objects', 'pack', 'pack-%s.pack' % ('1' * 40)) backend = _test_backend([], named_files={pack_name: b'pack contents'}) mat = re.search('.*', pack_name) output = b''.join(get_pack_file(self._req, backend, mat)) self.assertEqual(b'pack contents', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-packed-objects') self.assertTrue(self._req.cached) def test_get_idx_file(self): idx_name = os.path.join('objects', 'pack', 'pack-%s.idx' % ('1' * 40)) backend = _test_backend([], named_files={idx_name: b'idx contents'}) mat = re.search('.*', idx_name) output = b''.join(get_idx_file(self._req, backend, mat)) self.assertEqual(b'idx contents', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-packed-objects-toc') self.assertTrue(self._req.cached) def test_get_info_refs(self): self._environ['QUERY_STRING'] = '' blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') blob3 = make_object(Blob, data=b'3') tag1 = make_tag(blob2, name=b'tag-tag') objects = [blob1, blob2, blob3, tag1] refs = { b'HEAD': b'000', b'refs/heads/master': blob1.id, b'refs/tags/tag-tag': tag1.id, b'refs/tags/blob-tag': blob3.id, } backend = _test_backend(objects, refs=refs) mat = re.search('.*', '//info/refs') self.assertEqual([blob1.id + b'\trefs/heads/master\n', blob3.id + b'\trefs/tags/blob-tag\n', tag1.id + b'\trefs/tags/tag-tag\n', blob2.id + b'\trefs/tags/tag-tag^{}\n'], list(get_info_refs(self._req, backend, mat))) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) def test_get_info_refs_not_found(self): self._environ['QUERY_STRING'] = '' objects = [] refs = {} backend = _test_backend(objects, refs=refs) mat = re.search('info/refs', '/foo/info/refs') self.assertEqual( [b'No git repository was found at /foo'], list(get_info_refs(self._req, backend, mat))) self.assertEqual(HTTP_NOT_FOUND, self._status) self.assertContentTypeEquals('text/plain') def test_get_info_packs(self): class TestPackData(object): def __init__(self, sha): self.filename = "pack-%s.pack" % sha class TestPack(object): def __init__(self, sha): self.data = TestPackData(sha) packs = [TestPack(str(i) * 40) for i in range(1, 4)] class TestObjectStore(MemoryObjectStore): # property must be overridden, can't be assigned @property def packs(self): return packs store = TestObjectStore() repo = BaseRepo(store, None) backend = DictBackend({'/': repo}) mat = re.search('.*', '//info/packs') output = b''.join(get_info_packs(self._req, backend, mat)) expected = b''.join( [(b'P pack-' + s + b'.pack\n') for s in [b'1' * 40, b'2' * 40, b'3' * 40]]) self.assertEqual(expected, output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) class SmartHandlersTestCase(WebTestCase): class _TestUploadPackHandler(object): - def __init__(self, backend, args, proto, http_req=None, + def __init__(self, backend, args, proto, stateless_rpc=None, advertise_refs=False): self.args = args self.proto = proto - self.http_req = http_req + self.stateless_rpc = stateless_rpc self.advertise_refs = advertise_refs def handle(self): self.proto.write(b'handled input: ' + self.proto.recv(1024)) def _make_handler(self, *args, **kwargs): self._handler = self._TestUploadPackHandler(*args, **kwargs) return self._handler def _handlers(self): return {b'git-upload-pack': self._make_handler} def test_handle_service_request_unknown(self): mat = re.search('.*', '/git-evil-handler') content = list(handle_service_request(self._req, 'backend', mat)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertFalse(b'git-evil-handler' in b"".join(content)) self.assertFalse(self._req.cached) def _run_handle_service_request(self, content_length=None): self._environ['wsgi.input'] = BytesIO(b'foo') if content_length is not None: self._environ['CONTENT_LENGTH'] = content_length mat = re.search('.*', '/git-upload-pack') class Backend(object): def open_repository(self, path): return None handler_output = b''.join( handle_service_request(self._req, Backend(), mat)) write_output = self._output.getvalue() # Ensure all output was written via the write callback. self.assertEqual(b'', handler_output) self.assertEqual(b'handled input: foo', write_output) self.assertContentTypeEquals('application/x-git-upload-pack-result') self.assertFalse(self._handler.advertise_refs) - self.assertTrue(self._handler.http_req) + self.assertTrue(self._handler.stateless_rpc) self.assertFalse(self._req.cached) def test_handle_service_request(self): self._run_handle_service_request() def test_handle_service_request_with_length(self): self._run_handle_service_request(content_length='3') def test_handle_service_request_empty_length(self): self._run_handle_service_request(content_length='') def test_get_info_refs_unknown(self): self._environ['QUERY_STRING'] = 'service=git-evil-handler' class Backend(object): def open_repository(self, url): return None mat = re.search('.*', '/git-evil-pack') content = list(get_info_refs(self._req, Backend(), mat)) self.assertFalse(b'git-evil-handler' in b"".join(content)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertFalse(self._req.cached) def test_get_info_refs(self): self._environ['wsgi.input'] = BytesIO(b'foo') self._environ['QUERY_STRING'] = 'service=git-upload-pack' class Backend(object): def open_repository(self, url): return None mat = re.search('.*', '/git-upload-pack') handler_output = b''.join(get_info_refs(self._req, Backend(), mat)) write_output = self._output.getvalue() self.assertEqual((b'001e# service=git-upload-pack\n' b'0000' # input is ignored by the handler b'handled input: '), write_output) # Ensure all output was written via the write callback. self.assertEqual(b'', handler_output) self.assertTrue(self._handler.advertise_refs) - self.assertTrue(self._handler.http_req) + self.assertTrue(self._handler.stateless_rpc) self.assertFalse(self._req.cached) class LengthLimitedFileTestCase(TestCase): def test_no_cutoff(self): f = _LengthLimitedFile(BytesIO(b'foobar'), 1024) self.assertEqual(b'foobar', f.read()) def test_cutoff(self): f = _LengthLimitedFile(BytesIO(b'foobar'), 3) self.assertEqual(b'foo', f.read()) self.assertEqual(b'', f.read()) def test_multiple_reads(self): f = _LengthLimitedFile(BytesIO(b'foobar'), 3) self.assertEqual(b'fo', f.read(2)) self.assertEqual(b'o', f.read(2)) self.assertEqual(b'', f.read()) class HTTPGitRequestTestCase(WebTestCase): # This class tests the contents of the actual cache headers _req_class = HTTPGitRequest def test_not_found(self): self._req.cache_forever() # cache headers should be discarded message = 'Something not found' self.assertEqual(message.encode('ascii'), self._req.not_found(message)) self.assertEqual(HTTP_NOT_FOUND, self._status) self.assertEqual(set([('Content-Type', 'text/plain')]), set(self._headers)) def test_forbidden(self): self._req.cache_forever() # cache headers should be discarded message = 'Something not found' self.assertEqual(message.encode('ascii'), self._req.forbidden(message)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertEqual(set([('Content-Type', 'text/plain')]), set(self._headers)) def test_respond_ok(self): self._req.respond() self.assertEqual([], self._headers) self.assertEqual(HTTP_OK, self._status) def test_respond(self): self._req.nocache() self._req.respond(status=402, content_type='some/type', headers=[('X-Foo', 'foo'), ('X-Bar', 'bar')]) self.assertEqual(set([ ('X-Foo', 'foo'), ('X-Bar', 'bar'), ('Content-Type', 'some/type'), ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), ('Pragma', 'no-cache'), ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), ]), set(self._headers)) self.assertEqual(402, self._status) class HTTPGitApplicationTestCase(TestCase): def setUp(self): super(HTTPGitApplicationTestCase, self).setUp() self._app = HTTPGitApplication('backend') self._environ = { 'PATH_INFO': '/foo', 'REQUEST_METHOD': 'GET', } def _test_handler(self, req, backend, mat): # tests interface used by all handlers self.assertEqual(self._environ, req.environ) self.assertEqual('backend', backend) self.assertEqual('/foo', mat.group(0)) return 'output' def _add_handler(self, app): req = self._environ['REQUEST_METHOD'] app.services = { (req, re.compile('/foo$')): self._test_handler, } def test_call(self): self._add_handler(self._app) self.assertEqual('output', self._app(self._environ, None)) def test_fallback_app(self): def test_app(environ, start_response): return 'output' app = HTTPGitApplication('backend', fallback_app=test_app) self.assertEqual('output', app(self._environ, None)) class GunzipTestCase(HTTPGitApplicationTestCase): __doc__ = """TestCase for testing the GunzipFilter, ensuring the wsgi.input is correctly decompressed and headers are corrected. """ example_text = __doc__.encode('ascii') def setUp(self): super(GunzipTestCase, self).setUp() self._app = GunzipFilter(self._app) self._environ['HTTP_CONTENT_ENCODING'] = 'gzip' self._environ['REQUEST_METHOD'] = 'POST' def _get_zstream(self, text): zstream = BytesIO() zfile = gzip.GzipFile(fileobj=zstream, mode='w') zfile.write(text) zfile.close() zlength = zstream.tell() zstream.seek(0) return zstream, zlength def _test_call(self, orig, zstream, zlength): self._add_handler(self._app.app) self.assertLess(zlength, len(orig)) self.assertEqual(self._environ['HTTP_CONTENT_ENCODING'], 'gzip') self._environ['CONTENT_LENGTH'] = zlength self._environ['wsgi.input'] = zstream self._app(self._environ, None) buf = self._environ['wsgi.input'] self.assertIsNot(buf, zstream) buf.seek(0) self.assertEqual(orig, buf.read()) self.assertIs(None, self._environ.get('CONTENT_LENGTH')) self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ) def test_call(self): self._test_call( self.example_text, *self._get_zstream(self.example_text) ) def test_call_no_seek(self): """ This ensures that the gunzipping code doesn't require any methods on 'wsgi.input' except for '.read()'. (In particular, it shouldn't require '.seek()'. See https://github.com/jelmer/dulwich/issues/140.) """ zstream, zlength = self._get_zstream(self.example_text) self._test_call( self.example_text, MinimalistWSGIInputStream(zstream.read()), zlength) def test_call_no_working_seek(self): """ Similar to 'test_call_no_seek', but this time the methods are available (but defunct). See https://github.com/jonashaag/klaus/issues/154. """ zstream, zlength = self._get_zstream(self.example_text) self._test_call( self.example_text, MinimalistWSGIInputStream2(zstream.read()), zlength) diff --git a/dulwich/web.py b/dulwich/web.py index 89816cef..d4d6aa6b 100644 --- a/dulwich/web.py +++ b/dulwich/web.py @@ -1,526 +1,526 @@ # web.py -- WSGI smart-http server # Copyright (C) 2010 Google, Inc. # Copyright (C) 2012 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """HTTP server for dulwich that implements the git smart HTTP protocol.""" from io import BytesIO import shutil import tempfile import gzip import os import re import sys import time from typing import List, Tuple, Optional from wsgiref.simple_server import ( WSGIRequestHandler, ServerHandler, WSGIServer, make_server, ) from urllib.parse import parse_qs from dulwich import log_utils from dulwich.protocol import ( ReceivableProtocol, ) from dulwich.repo import ( BaseRepo, NotGitRepository, Repo, ) from dulwich.server import ( DictBackend, DEFAULT_HANDLERS, generate_info_refs, generate_objects_info_packs, ) logger = log_utils.getLogger(__name__) # HTTP error strings HTTP_OK = '200 OK' HTTP_NOT_FOUND = '404 Not Found' HTTP_FORBIDDEN = '403 Forbidden' HTTP_ERROR = '500 Internal Server Error' def date_time_string(timestamp: Optional[float] = None) -> str: # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the # Python 2.6.5 standard library, following modifications: # - Made a global rather than an instance method. # - weekdayname and monthname are renamed and locals rather than class # variables. # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] if timestamp is None: timestamp = time.time() year, month, day, hh, mm, ss, wd = time.gmtime(timestamp)[:7] return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % ( weekdays[wd], day, months[month], year, hh, mm, ss) def url_prefix(mat) -> str: """Extract the URL prefix from a regex match. Args: mat: A regex match object. Returns: The URL prefix, defined as the text before the match in the original string. Normalized to start with one leading slash and end with zero. """ return '/' + mat.string[:mat.start()].strip('/') def get_repo(backend, mat) -> BaseRepo: """Get a Repo instance for the given backend and URL regex match.""" return backend.open_repository(url_prefix(mat)) def send_file(req, f, content_type): """Send a file-like object to the request output. Args: req: The HTTPGitRequest object to send output to. f: An open file-like object to send; will be closed. content_type: The MIME type for the file. Returns: Iterator over the contents of the file, as chunks. """ if f is None: yield req.not_found('File not found') return try: req.respond(HTTP_OK, content_type) while True: data = f.read(10240) if not data: break yield data except IOError: yield req.error('Error reading file') finally: f.close() def _url_to_path(url): return url.replace('/', os.path.sep) def get_text_file(req, backend, mat): req.nocache() path = _url_to_path(mat.group()) logger.info('Sending plain text file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'text/plain') def get_loose_object(req, backend, mat): sha = (mat.group(1) + mat.group(2)).encode('ascii') logger.info('Sending loose object %s', sha) object_store = get_repo(backend, mat).object_store if not object_store.contains_loose(sha): yield req.not_found('Object not found') return try: data = object_store[sha].as_legacy_object() except IOError: yield req.error('Error reading object') return req.cache_forever() req.respond(HTTP_OK, 'application/x-git-loose-object') yield data def get_pack_file(req, backend, mat): req.cache_forever() path = _url_to_path(mat.group()) logger.info('Sending pack file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'application/x-git-packed-objects') def get_idx_file(req, backend, mat): req.cache_forever() path = _url_to_path(mat.group()) logger.info('Sending pack file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'application/x-git-packed-objects-toc') def get_info_refs(req, backend, mat): params = parse_qs(req.environ['QUERY_STRING']) service = params.get('service', [None])[0] try: repo = get_repo(backend, mat) except NotGitRepository as e: yield req.not_found(str(e)) return if service and not req.dumb: handler_cls = req.handlers.get(service.encode('ascii'), None) if handler_cls is None: yield req.forbidden('Unsupported service') return req.nocache() write = req.respond( HTTP_OK, 'application/x-%s-advertisement' % service) proto = ReceivableProtocol(BytesIO().read, write) handler = handler_cls(backend, [url_prefix(mat)], proto, - http_req=req, advertise_refs=True) + stateless_rpc=req, advertise_refs=True) handler.proto.write_pkt_line( b'# service=' + service.encode('ascii') + b'\n') handler.proto.write_pkt_line(None) handler.handle() else: # non-smart fallback # TODO: select_getanyfile() (see http-backend.c) req.nocache() req.respond(HTTP_OK, 'text/plain') logger.info('Emulating dumb info/refs') for text in generate_info_refs(repo): yield text def get_info_packs(req, backend, mat): req.nocache() req.respond(HTTP_OK, 'text/plain') logger.info('Emulating dumb info/packs') return generate_objects_info_packs(get_repo(backend, mat)) class _LengthLimitedFile(object): """Wrapper class to limit the length of reads from a file-like object. This is used to ensure EOF is read from the wsgi.input object once Content-Length bytes are read. This behavior is required by the WSGI spec but not implemented in wsgiref as of 2.5. """ def __init__(self, input, max_bytes): self._input = input self._bytes_avail = max_bytes def read(self, size=-1): if self._bytes_avail <= 0: return b'' if size == -1 or size > self._bytes_avail: size = self._bytes_avail self._bytes_avail -= size return self._input.read(size) # TODO: support more methods as necessary def handle_service_request(req, backend, mat): service = mat.group().lstrip('/') logger.info('Handling service request for %s', service) handler_cls = req.handlers.get(service.encode('ascii'), None) if handler_cls is None: yield req.forbidden('Unsupported service') return try: get_repo(backend, mat) except NotGitRepository as e: yield req.not_found(str(e)) return req.nocache() write = req.respond(HTTP_OK, 'application/x-%s-result' % service) proto = ReceivableProtocol(req.environ['wsgi.input'].read, write) # TODO(jelmer): Find a way to pass in repo, rather than having handler_cls # reopen. - handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req) + handler = handler_cls(backend, [url_prefix(mat)], proto, stateless_rpc=req) handler.handle() class HTTPGitRequest(object): """Class encapsulating the state of a single git HTTP request. :ivar environ: the WSGI environment for the request. """ def __init__( self, environ, start_response, dumb: bool = False, handlers=None): self.environ = environ self.dumb = dumb self.handlers = handlers self._start_response = start_response self._cache_headers = [] # type: List[Tuple[str, str]] self._headers = [] # type: List[Tuple[str, str]] def add_header(self, name, value): """Add a header to the response.""" self._headers.append((name, value)) def respond( self, status: str = HTTP_OK, content_type: Optional[str] = None, headers: Optional[List[Tuple[str, str]]] = None): """Begin a response with the given status and other headers.""" if headers: self._headers.extend(headers) if content_type: self._headers.append(('Content-Type', content_type)) self._headers.extend(self._cache_headers) return self._start_response(status, self._headers) def not_found(self, message: str) -> bytes: """Begin a HTTP 404 response and return the text of a message.""" self._cache_headers = [] logger.info('Not found: %s', message) self.respond(HTTP_NOT_FOUND, 'text/plain') return message.encode('ascii') def forbidden(self, message: str) -> bytes: """Begin a HTTP 403 response and return the text of a message.""" self._cache_headers = [] logger.info('Forbidden: %s', message) self.respond(HTTP_FORBIDDEN, 'text/plain') return message.encode('ascii') def error(self, message: str) -> bytes: """Begin a HTTP 500 response and return the text of a message.""" self._cache_headers = [] logger.error('Error: %s', message) self.respond(HTTP_ERROR, 'text/plain') return message.encode('ascii') def nocache(self) -> None: """Set the response to never be cached by the client.""" self._cache_headers = [ ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), ('Pragma', 'no-cache'), ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), ] def cache_forever(self) -> None: """Set the response to be cached forever by the client.""" now = time.time() self._cache_headers = [ ('Date', date_time_string(now)), ('Expires', date_time_string(now + 31536000)), ('Cache-Control', 'public, max-age=31536000'), ] class HTTPGitApplication(object): """Class encapsulating the state of a git WSGI application. :ivar backend: the Backend object backing this application """ services = { ('GET', re.compile('/HEAD$')): get_text_file, ('GET', re.compile('/info/refs$')): get_info_refs, ('GET', re.compile('/objects/info/alternates$')): get_text_file, ('GET', re.compile('/objects/info/http-alternates$')): get_text_file, ('GET', re.compile('/objects/info/packs$')): get_info_packs, ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object, ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file, ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file, ('POST', re.compile('/git-upload-pack$')): handle_service_request, ('POST', re.compile('/git-receive-pack$')): handle_service_request, } def __init__( self, backend, dumb: bool = False, handlers=None, fallback_app=None): self.backend = backend self.dumb = dumb self.handlers = dict(DEFAULT_HANDLERS) self.fallback_app = fallback_app if handlers is not None: self.handlers.update(handlers) def __call__(self, environ, start_response): path = environ['PATH_INFO'] method = environ['REQUEST_METHOD'] req = HTTPGitRequest(environ, start_response, dumb=self.dumb, handlers=self.handlers) # environ['QUERY_STRING'] has qs args handler = None for smethod, spath in self.services.keys(): if smethod != method: continue mat = spath.search(path) if mat: handler = self.services[smethod, spath] break if handler is None: if self.fallback_app is not None: return self.fallback_app(environ, start_response) else: return [req.not_found('Sorry, that method is not supported')] return handler(req, self.backend, mat) class GunzipFilter(object): """WSGI middleware that unzips gzip-encoded requests before passing on to the underlying application. """ def __init__(self, application): self.app = application def __call__(self, environ, start_response): if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip': try: environ['wsgi.input'].tell() wsgi_input = environ['wsgi.input'] except (AttributeError, IOError, NotImplementedError): # The gzip implementation in the standard library of Python 2.x # requires working '.seek()' and '.tell()' methods on the input # stream. Read the data into a temporary file to work around # this limitation. wsgi_input = tempfile.SpooledTemporaryFile(16 * 1024 * 1024) shutil.copyfileobj(environ['wsgi.input'], wsgi_input) wsgi_input.seek(0) environ['wsgi.input'] = gzip.GzipFile( filename=None, fileobj=wsgi_input, mode='r') del environ['HTTP_CONTENT_ENCODING'] if 'CONTENT_LENGTH' in environ: del environ['CONTENT_LENGTH'] return self.app(environ, start_response) class LimitedInputFilter(object): """WSGI middleware that limits the input length of a request to that specified in Content-Length. """ def __init__(self, application): self.app = application def __call__(self, environ, start_response): # This is not necessary if this app is run from a conforming WSGI # server. Unfortunately, there's no way to tell that at this point. # TODO: git may used HTTP/1.1 chunked encoding instead of specifying # content-length content_length = environ.get('CONTENT_LENGTH', '') if content_length: environ['wsgi.input'] = _LengthLimitedFile( environ['wsgi.input'], int(content_length)) return self.app(environ, start_response) def make_wsgi_chain(*args, **kwargs): """Factory function to create an instance of HTTPGitApplication, correctly wrapped with needed middleware. """ app = HTTPGitApplication(*args, **kwargs) wrapped_app = LimitedInputFilter(GunzipFilter(app)) return wrapped_app class ServerHandlerLogger(ServerHandler): """ServerHandler that uses dulwich's logger for logging exceptions.""" def log_exception(self, exc_info): logger.exception('Exception happened during processing of request', exc_info=exc_info) def log_message(self, format, *args): logger.info(format, *args) def log_error(self, *args): logger.error(*args) class WSGIRequestHandlerLogger(WSGIRequestHandler): """WSGIRequestHandler that uses dulwich's logger for logging exceptions.""" def log_exception(self, exc_info): logger.exception('Exception happened during processing of request', exc_info=exc_info) def log_message(self, format, *args): logger.info(format, *args) def log_error(self, *args): logger.error(*args) def handle(self): """Handle a single HTTP request""" self.raw_requestline = self.rfile.readline() if not self.parse_request(): # An error code has been sent, just exit return handler = ServerHandlerLogger( self.rfile, self.wfile, self.get_stderr(), self.get_environ() ) handler.request_handler = self # backpointer for logging handler.run(self.server.get_app()) class WSGIServerLogger(WSGIServer): def handle_error(self, request, client_address): """Handle an error. """ logger.exception( 'Exception happened during processing of request from %s' % str(client_address)) def main(argv=sys.argv): """Entry point for starting an HTTP git server.""" import optparse parser = optparse.OptionParser() parser.add_option("-l", "--listen_address", dest="listen_address", default="localhost", help="Binding IP address.") parser.add_option("-p", "--port", dest="port", type=int, default=8000, help="Port to listen on.") options, args = parser.parse_args(argv) if len(args) > 1: gitdir = args[1] else: gitdir = os.getcwd() log_utils.default_logging_config() backend = DictBackend({'/': Repo(gitdir)}) app = make_wsgi_chain(backend) server = make_server(options.listen_address, options.port, app, handler_class=WSGIRequestHandlerLogger, server_class=WSGIServerLogger) logger.info('Listening for HTTP connections on %s:%d', options.listen_address, options.port) server.serve_forever() if __name__ == '__main__': main()