diff --git a/dulwich/tests/test_web.py b/dulwich/tests/test_web.py index ae46665f..a41798fc 100644 --- a/dulwich/tests/test_web.py +++ b/dulwich/tests/test_web.py @@ -1,490 +1,517 @@ # test_web.py -- Tests for the git HTTP server # Copyright (C) 2010 Google, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # or (at your option) any later version of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. """Tests for the Git HTTP server.""" from cStringIO import StringIO import gzip import re import os from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Blob, Tag, ) from dulwich.repo import ( BaseRepo, MemoryRepo, ) from dulwich.server import ( DictBackend, ) from dulwich.tests import ( TestCase, ) from dulwich.web import ( HTTP_OK, HTTP_NOT_FOUND, HTTP_FORBIDDEN, HTTP_ERROR, GunzipFilter, send_file, get_text_file, get_loose_object, get_pack_file, get_idx_file, get_info_refs, get_info_packs, handle_service_request, _LengthLimitedFile, HTTPGitRequest, HTTPGitApplication, ) from dulwich.tests.utils import ( make_object, ) class TestHTTPGitRequest(HTTPGitRequest): """HTTPGitRequest with overridden methods to help test caching.""" def __init__(self, *args, **kwargs): HTTPGitRequest.__init__(self, *args, **kwargs) self.cached = None def nocache(self): self.cached = False def cache_forever(self): self.cached = True class WebTestCase(TestCase): """Base TestCase with useful instance vars and utility functions.""" _req_class = TestHTTPGitRequest def setUp(self): super(WebTestCase, self).setUp() self._environ = {} self._req = self._req_class(self._environ, self._start_response, handlers=self._handlers()) self._status = None self._headers = [] self._output = StringIO() def _start_response(self, status, headers): self._status = status self._headers = list(headers) return self._output.write def _handlers(self): return None def assertContentTypeEquals(self, expected): self.assertTrue(('Content-Type', expected) in self._headers) def _test_backend(objects, refs=None, named_files=None): if not refs: refs = {} if not named_files: named_files = {} repo = MemoryRepo.init_bare(objects, refs) for path, contents in named_files.iteritems(): repo._put_named_file(path, contents) return DictBackend({'/': repo}) class DumbHandlersTestCase(WebTestCase): def test_send_file_not_found(self): list(send_file(self._req, None, 'text/plain')) self.assertEqual(HTTP_NOT_FOUND, self._status) def test_send_file(self): f = StringIO('foobar') output = ''.join(send_file(self._req, f, 'some/thing')) self.assertEqual('foobar', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('some/thing') self.assertTrue(f.closed) def test_send_file_buffered(self): bufsize = 10240 xs = 'x' * bufsize f = StringIO(2 * xs) self.assertEqual([xs, xs], list(send_file(self._req, f, 'some/thing'))) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('some/thing') self.assertTrue(f.closed) def test_send_file_error(self): class TestFile(object): def __init__(self, exc_class): self.closed = False self._exc_class = exc_class def read(self, size=-1): raise self._exc_class() def close(self): self.closed = True f = TestFile(IOError) list(send_file(self._req, f, 'some/thing')) self.assertEqual(HTTP_ERROR, self._status) self.assertTrue(f.closed) self.assertFalse(self._req.cached) # non-IOErrors are reraised f = TestFile(AttributeError) self.assertRaises(AttributeError, list, send_file(self._req, f, 'some/thing')) self.assertTrue(f.closed) self.assertFalse(self._req.cached) def test_get_text_file(self): backend = _test_backend([], named_files={'description': 'foo'}) mat = re.search('.*', 'description') output = ''.join(get_text_file(self._req, backend, mat)) self.assertEqual('foo', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) def test_get_loose_object(self): blob = make_object(Blob, data='foo') backend = _test_backend([blob]) mat = re.search('^(..)(.{38})$', blob.id) output = ''.join(get_loose_object(self._req, backend, mat)) self.assertEqual(blob.as_legacy_object(), output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-loose-object') self.assertTrue(self._req.cached) def test_get_loose_object_missing(self): mat = re.search('^(..)(.{38})$', '1' * 40) list(get_loose_object(self._req, _test_backend([]), mat)) self.assertEqual(HTTP_NOT_FOUND, self._status) def test_get_loose_object_error(self): blob = make_object(Blob, data='foo') backend = _test_backend([blob]) mat = re.search('^(..)(.{38})$', blob.id) def as_legacy_object_error(): raise IOError blob.as_legacy_object = as_legacy_object_error list(get_loose_object(self._req, backend, mat)) self.assertEqual(HTTP_ERROR, self._status) def test_get_pack_file(self): pack_name = os.path.join('objects', 'pack', 'pack-%s.pack' % ('1' * 40)) backend = _test_backend([], named_files={pack_name: 'pack contents'}) mat = re.search('.*', pack_name) output = ''.join(get_pack_file(self._req, backend, mat)) self.assertEqual('pack contents', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-packed-objects') self.assertTrue(self._req.cached) def test_get_idx_file(self): idx_name = os.path.join('objects', 'pack', 'pack-%s.idx' % ('1' * 40)) backend = _test_backend([], named_files={idx_name: 'idx contents'}) mat = re.search('.*', idx_name) output = ''.join(get_idx_file(self._req, backend, mat)) self.assertEqual('idx contents', output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('application/x-git-packed-objects-toc') self.assertTrue(self._req.cached) def test_get_info_refs(self): self._environ['QUERY_STRING'] = '' blob1 = make_object(Blob, data='1') blob2 = make_object(Blob, data='2') blob3 = make_object(Blob, data='3') tag1 = make_object(Tag, name='tag-tag', tagger='Test ', tag_time=12345, tag_timezone=0, message='message', object=(Blob, blob2.id)) objects = [blob1, blob2, blob3, tag1] refs = { 'HEAD': '000', 'refs/heads/master': blob1.id, 'refs/tags/tag-tag': tag1.id, 'refs/tags/blob-tag': blob3.id, } backend = _test_backend(objects, refs=refs) mat = re.search('.*', '//info/refs') self.assertEqual(['%s\trefs/heads/master\n' % blob1.id, '%s\trefs/tags/blob-tag\n' % blob3.id, '%s\trefs/tags/tag-tag\n' % tag1.id, '%s\trefs/tags/tag-tag^{}\n' % blob2.id], list(get_info_refs(self._req, backend, mat))) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) def test_get_info_packs(self): class TestPackData(object): def __init__(self, sha): self.filename = "pack-%s.pack" % sha class TestPack(object): def __init__(self, sha): self.data = TestPackData(sha) packs = [TestPack(str(i) * 40) for i in xrange(1, 4)] class TestObjectStore(MemoryObjectStore): # property must be overridden, can't be assigned @property def packs(self): return packs store = TestObjectStore() repo = BaseRepo(store, None) backend = DictBackend({'/': repo}) mat = re.search('.*', '//info/packs') output = ''.join(get_info_packs(self._req, backend, mat)) expected = 'P pack-%s.pack\n' * 3 expected %= ('1' * 40, '2' * 40, '3' * 40) self.assertEqual(expected, output) self.assertEqual(HTTP_OK, self._status) self.assertContentTypeEquals('text/plain') self.assertFalse(self._req.cached) class SmartHandlersTestCase(WebTestCase): class _TestUploadPackHandler(object): def __init__(self, backend, args, proto, http_req=None, advertise_refs=False): self.args = args self.proto = proto self.http_req = http_req self.advertise_refs = advertise_refs def handle(self): self.proto.write('handled input: %s' % self.proto.recv(1024)) def _make_handler(self, *args, **kwargs): self._handler = self._TestUploadPackHandler(*args, **kwargs) return self._handler def _handlers(self): return {'git-upload-pack': self._make_handler} def test_handle_service_request_unknown(self): mat = re.search('.*', '/git-evil-handler') list(handle_service_request(self._req, 'backend', mat)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertFalse(self._req.cached) def _run_handle_service_request(self, content_length=None): self._environ['wsgi.input'] = StringIO('foo') if content_length is not None: self._environ['CONTENT_LENGTH'] = content_length mat = re.search('.*', '/git-upload-pack') handler_output = ''.join( handle_service_request(self._req, 'backend', mat)) write_output = self._output.getvalue() # Ensure all output was written via the write callback. self.assertEqual('', handler_output) self.assertEqual('handled input: foo', write_output) self.assertContentTypeEquals('application/x-git-upload-pack-result') self.assertFalse(self._handler.advertise_refs) self.assertTrue(self._handler.http_req) self.assertFalse(self._req.cached) def test_handle_service_request(self): self._run_handle_service_request() def test_handle_service_request_with_length(self): self._run_handle_service_request(content_length='3') def test_handle_service_request_empty_length(self): self._run_handle_service_request(content_length='') def test_get_info_refs_unknown(self): self._environ['QUERY_STRING'] = 'service=git-evil-handler' list(get_info_refs(self._req, 'backend', None)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertFalse(self._req.cached) def test_get_info_refs(self): self._environ['wsgi.input'] = StringIO('foo') self._environ['QUERY_STRING'] = 'service=git-upload-pack' mat = re.search('.*', '/git-upload-pack') handler_output = ''.join(get_info_refs(self._req, 'backend', mat)) write_output = self._output.getvalue() self.assertEqual(('001e# service=git-upload-pack\n' '0000' # input is ignored by the handler 'handled input: '), write_output) # Ensure all output was written via the write callback. self.assertEqual('', handler_output) self.assertTrue(self._handler.advertise_refs) self.assertTrue(self._handler.http_req) self.assertFalse(self._req.cached) class LengthLimitedFileTestCase(TestCase): def test_no_cutoff(self): f = _LengthLimitedFile(StringIO('foobar'), 1024) self.assertEqual('foobar', f.read()) def test_cutoff(self): f = _LengthLimitedFile(StringIO('foobar'), 3) self.assertEqual('foo', f.read()) self.assertEqual('', f.read()) def test_multiple_reads(self): f = _LengthLimitedFile(StringIO('foobar'), 3) self.assertEqual('fo', f.read(2)) self.assertEqual('o', f.read(2)) self.assertEqual('', f.read()) class HTTPGitRequestTestCase(WebTestCase): # This class tests the contents of the actual cache headers _req_class = HTTPGitRequest def test_not_found(self): self._req.cache_forever() # cache headers should be discarded message = 'Something not found' self.assertEqual(message, self._req.not_found(message)) self.assertEqual(HTTP_NOT_FOUND, self._status) self.assertEqual(set([('Content-Type', 'text/plain')]), set(self._headers)) def test_forbidden(self): self._req.cache_forever() # cache headers should be discarded message = 'Something not found' self.assertEqual(message, self._req.forbidden(message)) self.assertEqual(HTTP_FORBIDDEN, self._status) self.assertEqual(set([('Content-Type', 'text/plain')]), set(self._headers)) def test_respond_ok(self): self._req.respond() self.assertEqual([], self._headers) self.assertEqual(HTTP_OK, self._status) def test_respond(self): self._req.nocache() self._req.respond(status=402, content_type='some/type', headers=[('X-Foo', 'foo'), ('X-Bar', 'bar')]) self.assertEqual(set([ ('X-Foo', 'foo'), ('X-Bar', 'bar'), ('Content-Type', 'some/type'), ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), ('Pragma', 'no-cache'), ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), ]), set(self._headers)) self.assertEqual(402, self._status) class HTTPGitApplicationTestCase(TestCase): def setUp(self): super(HTTPGitApplicationTestCase, self).setUp() self._app = HTTPGitApplication('backend') self._environ = { 'PATH_INFO': '/foo', 'REQUEST_METHOD': 'GET', } def _test_handler(self, req, backend, mat): # tests interface used by all handlers self.assertEqual(self._environ, req.environ) self.assertEqual('backend', backend) self.assertEqual('/foo', mat.group(0)) return 'output' def _add_handler(self, app): req = self._environ['REQUEST_METHOD'] app.services = { (req, re.compile('/foo$')): self._test_handler, } def test_call(self): self._add_handler(self._app) self.assertEqual('output', self._app(self._environ, None)) def test_fallback_app(self): def test_app(environ, start_response): return 'output' app = HTTPGitApplication('backend', fallback_app=test_app) self.assertEqual('output', app(self._environ, None)) class GunzipTestCase(HTTPGitApplicationTestCase): """TestCase for testing the GunzipFilter, ensuring the wsgi.input is correctly decompressed and headers are corrected. """ + example_text = __doc__ def setUp(self): super(GunzipTestCase, self).setUp() self._app = GunzipFilter(self._app) self._environ['HTTP_CONTENT_ENCODING'] = 'gzip' self._environ['REQUEST_METHOD'] = 'POST' def _get_zstream(self, text): zstream = StringIO() zfile = gzip.GzipFile(fileobj=zstream, mode='w') zfile.write(text) zfile.close() - return zstream - - def test_call(self): - self._add_handler(self._app.app) - orig = self.__class__.__doc__ - zstream = self._get_zstream(orig) zlength = zstream.tell() zstream.seek(0) + return zstream, zlength + + def _test_call(self, orig, zstream, zlength): + self._add_handler(self._app.app) self.assertLess(zlength, len(orig)) self.assertEqual(self._environ['HTTP_CONTENT_ENCODING'], 'gzip') self._environ['CONTENT_LENGTH'] = zlength self._environ['wsgi.input'] = zstream app_output = self._app(self._environ, None) buf = self._environ['wsgi.input'] self.assertIsNot(buf, zstream) buf.seek(0) self.assertEqual(orig, buf.read()) self.assertIs(None, self._environ.get('CONTENT_LENGTH')) self.assertNotIn('HTTP_CONTENT_ENCODING', self._environ) + + def test_call(self): + self._test_call( + self.example_text, + *self._get_zstream(self.example_text) + ) + + def test_call_no_seek(self): + """ + This ensures that the gunzipping code doesn't require any methods on + 'wsgi.input' except for '.read()'. (In particular, it shouldn't + require '.seek()'. See https://github.com/jelmer/dulwich/issues/140.) + """ + class MinimalistWSGIInputStream(object): + def __init__(self, data): + self.data = data + self.pos = 0 + + def read(self, howmuch): + start = self.pos + end = self.pos + howmuch + if start >= len(self.data): + return '' + self.pos = end + return self.data[start:end] + + zstream, zlength = self._get_zstream(self.example_text) + self._test_call(self.example_text, MinimalistWSGIInputStream(zstream.read()), zlength) diff --git a/dulwich/web.py b/dulwich/web.py index 47588d57..d13496d5 100644 --- a/dulwich/web.py +++ b/dulwich/web.py @@ -1,484 +1,497 @@ # web.py -- WSGI smart-http server # Copyright (C) 2010 Google, Inc. # Copyright (C) 2012 Jelmer Vernooij # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # or (at your option) any later version of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. """HTTP server for dulwich that implements the git smart HTTP protocol.""" from cStringIO import StringIO +import shutil +import tempfile import gzip import os import re import sys import time from urlparse import parse_qs from dulwich import log_utils from dulwich.protocol import ( ReceivableProtocol, ) from dulwich.repo import ( Repo, ) from dulwich.server import ( DictBackend, DEFAULT_HANDLERS, generate_info_refs, generate_objects_info_packs, ) logger = log_utils.getLogger(__name__) # HTTP error strings HTTP_OK = '200 OK' HTTP_NOT_FOUND = '404 Not Found' HTTP_FORBIDDEN = '403 Forbidden' HTTP_ERROR = '500 Internal Server Error' def date_time_string(timestamp=None): # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the # Python 2.6.5 standard library, following modifications: # - Made a global rather than an instance method. # - weekdayname and monthname are renamed and locals rather than class # variables. # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] if timestamp is None: timestamp = time.time() year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % ( weekdays[wd], day, months[month], year, hh, mm, ss) def url_prefix(mat): """Extract the URL prefix from a regex match. :param mat: A regex match object. :returns: The URL prefix, defined as the text before the match in the original string. Normalized to start with one leading slash and end with zero. """ return '/' + mat.string[:mat.start()].strip('/') def get_repo(backend, mat): """Get a Repo instance for the given backend and URL regex match.""" return backend.open_repository(url_prefix(mat)) def send_file(req, f, content_type): """Send a file-like object to the request output. :param req: The HTTPGitRequest object to send output to. :param f: An open file-like object to send; will be closed. :param content_type: The MIME type for the file. :return: Iterator over the contents of the file, as chunks. """ if f is None: yield req.not_found('File not found') return try: req.respond(HTTP_OK, content_type) while True: data = f.read(10240) if not data: break yield data f.close() except IOError: f.close() yield req.error('Error reading file') except: f.close() raise def _url_to_path(url): return url.replace('/', os.path.sep) def get_text_file(req, backend, mat): req.nocache() path = _url_to_path(mat.group()) logger.info('Sending plain text file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'text/plain') def get_loose_object(req, backend, mat): sha = mat.group(1) + mat.group(2) logger.info('Sending loose object %s', sha) object_store = get_repo(backend, mat).object_store if not object_store.contains_loose(sha): yield req.not_found('Object not found') return try: data = object_store[sha].as_legacy_object() except IOError: yield req.error('Error reading object') return req.cache_forever() req.respond(HTTP_OK, 'application/x-git-loose-object') yield data def get_pack_file(req, backend, mat): req.cache_forever() path = _url_to_path(mat.group()) logger.info('Sending pack file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'application/x-git-packed-objects') def get_idx_file(req, backend, mat): req.cache_forever() path = _url_to_path(mat.group()) logger.info('Sending pack file %s', path) return send_file(req, get_repo(backend, mat).get_named_file(path), 'application/x-git-packed-objects-toc') def get_info_refs(req, backend, mat): params = parse_qs(req.environ['QUERY_STRING']) service = params.get('service', [None])[0] if service and not req.dumb: handler_cls = req.handlers.get(service, None) if handler_cls is None: yield req.forbidden('Unsupported service %s' % service) return req.nocache() write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service) proto = ReceivableProtocol(StringIO().read, write) handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req, advertise_refs=True) handler.proto.write_pkt_line('# service=%s\n' % service) handler.proto.write_pkt_line(None) handler.handle() else: # non-smart fallback # TODO: select_getanyfile() (see http-backend.c) req.nocache() req.respond(HTTP_OK, 'text/plain') logger.info('Emulating dumb info/refs') repo = get_repo(backend, mat) for text in generate_info_refs(repo): yield text def get_info_packs(req, backend, mat): req.nocache() req.respond(HTTP_OK, 'text/plain') logger.info('Emulating dumb info/packs') return generate_objects_info_packs(get_repo(backend, mat)) class _LengthLimitedFile(object): """Wrapper class to limit the length of reads from a file-like object. This is used to ensure EOF is read from the wsgi.input object once Content-Length bytes are read. This behavior is required by the WSGI spec but not implemented in wsgiref as of 2.5. """ def __init__(self, input, max_bytes): self._input = input self._bytes_avail = max_bytes def read(self, size=-1): if self._bytes_avail <= 0: return '' if size == -1 or size > self._bytes_avail: size = self._bytes_avail self._bytes_avail -= size return self._input.read(size) # TODO: support more methods as necessary def handle_service_request(req, backend, mat): service = mat.group().lstrip('/') logger.info('Handling service request for %s', service) handler_cls = req.handlers.get(service, None) if handler_cls is None: yield req.forbidden('Unsupported service %s' % service) return req.nocache() write = req.respond(HTTP_OK, 'application/x-%s-result' % service) proto = ReceivableProtocol(req.environ['wsgi.input'].read, write) handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req) handler.handle() class HTTPGitRequest(object): """Class encapsulating the state of a single git HTTP request. :ivar environ: the WSGI environment for the request. """ def __init__(self, environ, start_response, dumb=False, handlers=None): self.environ = environ self.dumb = dumb self.handlers = handlers self._start_response = start_response self._cache_headers = [] self._headers = [] def add_header(self, name, value): """Add a header to the response.""" self._headers.append((name, value)) def respond(self, status=HTTP_OK, content_type=None, headers=None): """Begin a response with the given status and other headers.""" if headers: self._headers.extend(headers) if content_type: self._headers.append(('Content-Type', content_type)) self._headers.extend(self._cache_headers) return self._start_response(status, self._headers) def not_found(self, message): """Begin a HTTP 404 response and return the text of a message.""" self._cache_headers = [] logger.info('Not found: %s', message) self.respond(HTTP_NOT_FOUND, 'text/plain') return message def forbidden(self, message): """Begin a HTTP 403 response and return the text of a message.""" self._cache_headers = [] logger.info('Forbidden: %s', message) self.respond(HTTP_FORBIDDEN, 'text/plain') return message def error(self, message): """Begin a HTTP 500 response and return the text of a message.""" self._cache_headers = [] logger.error('Error: %s', message) self.respond(HTTP_ERROR, 'text/plain') return message def nocache(self): """Set the response to never be cached by the client.""" self._cache_headers = [ ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), ('Pragma', 'no-cache'), ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), ] def cache_forever(self): """Set the response to be cached forever by the client.""" now = time.time() self._cache_headers = [ ('Date', date_time_string(now)), ('Expires', date_time_string(now + 31536000)), ('Cache-Control', 'public, max-age=31536000'), ] class HTTPGitApplication(object): """Class encapsulating the state of a git WSGI application. :ivar backend: the Backend object backing this application """ services = { ('GET', re.compile('/HEAD$')): get_text_file, ('GET', re.compile('/info/refs$')): get_info_refs, ('GET', re.compile('/objects/info/alternates$')): get_text_file, ('GET', re.compile('/objects/info/http-alternates$')): get_text_file, ('GET', re.compile('/objects/info/packs$')): get_info_packs, ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object, ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file, ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file, ('POST', re.compile('/git-upload-pack$')): handle_service_request, ('POST', re.compile('/git-receive-pack$')): handle_service_request, } def __init__(self, backend, dumb=False, handlers=None, fallback_app=None): self.backend = backend self.dumb = dumb self.handlers = dict(DEFAULT_HANDLERS) self.fallback_app = fallback_app if handlers is not None: self.handlers.update(handlers) def __call__(self, environ, start_response): path = environ['PATH_INFO'] method = environ['REQUEST_METHOD'] req = HTTPGitRequest(environ, start_response, dumb=self.dumb, handlers=self.handlers) # environ['QUERY_STRING'] has qs args handler = None for smethod, spath in self.services.iterkeys(): if smethod != method: continue mat = spath.search(path) if mat: handler = self.services[smethod, spath] break if handler is None: if self.fallback_app is not None: return self.fallback_app(environ, start_response) else: return req.not_found('Sorry, that method is not supported') return handler(req, self.backend, mat) class GunzipFilter(object): """WSGI middleware that unzips gzip-encoded requests before passing on to the underlying application. """ def __init__(self, application): self.app = application def __call__(self, environ, start_response): if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip': - environ.pop('HTTP_CONTENT_ENCODING') + if hasattr(environ['wsgi.input'], 'seek'): + wsgi_input = environ['wsgi.input'] + else: + # The gzip implementation in the standard library of Python 2.x + # requires the '.seek()' and '.tell()' methods to be available + # on the input stream. Read the data into a temporary file to + # work around this limitation. + wsgi_input = tempfile.SpooledTemporaryFile(16 * 1024 * 1024) + shutil.copyfileobj(environ['wsgi.input'], wsgi_input) + wsgi_input.seek(0) + + environ['wsgi.input'] = gzip.GzipFile(filename=None, fileobj=wsgi_input, mode='r') + del environ['HTTP_CONTENT_ENCODING'] if 'CONTENT_LENGTH' in environ: del environ['CONTENT_LENGTH'] - environ['wsgi.input'] = gzip.GzipFile(filename=None, - fileobj=environ['wsgi.input'], mode='r') + return self.app(environ, start_response) class LimitedInputFilter(object): """WSGI middleware that limits the input length of a request to that specified in Content-Length. """ def __init__(self, application): self.app = application def __call__(self, environ, start_response): # This is not necessary if this app is run from a conforming WSGI # server. Unfortunately, there's no way to tell that at this point. # TODO: git may used HTTP/1.1 chunked encoding instead of specifying # content-length content_length = environ.get('CONTENT_LENGTH', '') if content_length: environ['wsgi.input'] = _LengthLimitedFile( environ['wsgi.input'], int(content_length)) return self.app(environ, start_response) def make_wsgi_chain(*args, **kwargs): """Factory function to create an instance of HTTPGitApplication, correctly wrapped with needed middleware. """ app = HTTPGitApplication(*args, **kwargs) wrapped_app = GunzipFilter(LimitedInputFilter(app)) return wrapped_app # The reference server implementation is based on wsgiref, which is not # distributed with python 2.4. If wsgiref is not present, users will not be # able to use the HTTP server without a little extra work. try: from wsgiref.simple_server import ( WSGIRequestHandler, ServerHandler, WSGIServer, make_server, ) class ServerHandlerLogger(ServerHandler): """ServerHandler that uses dulwich's logger for logging exceptions.""" def log_exception(self, exc_info): logger.exception('Exception happened during processing of request', exc_info=exc_info) def log_message(self, format, *args): logger.info(format, *args) def log_error(self, *args): logger.error(*args) class WSGIRequestHandlerLogger(WSGIRequestHandler): """WSGIRequestHandler that uses dulwich's logger for logging exceptions.""" def log_exception(self, exc_info): logger.exception('Exception happened during processing of request', exc_info=exc_info) def log_message(self, format, *args): logger.info(format, *args) def log_error(self, *args): logger.error(*args) def handle(self): """Handle a single HTTP request""" self.raw_requestline = self.rfile.readline() if not self.parse_request(): # An error code has been sent, just exit return handler = ServerHandlerLogger( self.rfile, self.wfile, self.get_stderr(), self.get_environ() ) handler.request_handler = self # backpointer for logging handler.run(self.server.get_app()) class WSGIServerLogger(WSGIServer): def handle_error(self, request, client_address): """Handle an error. """ logger.exception('Exception happened during processing of request from %s' % str(client_address)) def main(argv=sys.argv): """Entry point for starting an HTTP git server.""" if len(argv) > 1: gitdir = argv[1] else: gitdir = os.getcwd() # TODO: allow serving on other addresses/ports via command-line flag listen_addr = '' port = 8000 log_utils.default_logging_config() backend = DictBackend({'/': Repo(gitdir)}) app = make_wsgi_chain(backend) server = make_server(listen_addr, port, app, handler_class=WSGIRequestHandlerLogger, server_class=WSGIServerLogger) logger.info('Listening for HTTP connections on %s:%d', listen_addr, port) server.serve_forever() except ImportError: # No wsgiref found; don't provide the reference functionality, but leave # the rest of the WSGI-based implementation. def main(argv=sys.argv): """Stub entry point for failing to start a server without wsgiref.""" sys.stderr.write( 'Sorry, the wsgiref module is required for dul-web.\n') sys.exit(1) if __name__ == '__main__': main()