diff --git a/dulwich/archive.py b/dulwich/archive.py index d2ec23e7..a6370d83 100644 --- a/dulwich/archive.py +++ b/dulwich/archive.py @@ -1,131 +1,132 @@ # archive.py -- Creating an archive from a tarball # Copyright (C) 2015 Jonas Haag # Copyright (C) 2015 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Generates tarballs for Git trees. """ import posixpath import stat import tarfile import struct from os import SEEK_END from io import BytesIO from contextlib import closing class ChunkedBytesIO(object): """Turn a list of bytestrings into a file-like object. This is similar to creating a `BytesIO` from a concatenation of the bytestring list, but saves memory by NOT creating one giant bytestring first:: BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO( list_of_bytestrings) """ def __init__(self, contents): self.contents = contents self.pos = (0, 0) def read(self, maxbytes=None): if maxbytes < 0: maxbytes = float('inf') buf = [] chunk, cursor = self.pos while chunk < len(self.contents): if maxbytes < len(self.contents[chunk]) - cursor: buf.append(self.contents[chunk][cursor:cursor+maxbytes]) cursor += maxbytes self.pos = (chunk, cursor) break else: buf.append(self.contents[chunk][cursor:]) maxbytes -= len(self.contents[chunk]) - cursor chunk += 1 cursor = 0 self.pos = (chunk, cursor) return b''.join(buf) def tar_stream(store, tree, mtime, format=''): """Generate a tar stream for the contents of a Git tree. Returns a generator that lazily assembles a .tar.gz archive, yielding it in pieces (bytestrings). To obtain the complete .tar.gz binary file, simply concatenate these chunks. :param store: Object store to retrieve objects from :param tree: Tree object for the tree root :param mtime: UNIX timestamp that is assigned as the modification time for all files, and the gzip header modification time if format='gz' :param format: Optional compression format for tarball :return: Bytestrings """ buf = BytesIO() with closing(tarfile.open(None, "w:%s" % format, buf)) as tar: if format == 'gz': # Manually correct the gzip header file modification time so that # archives created from the same Git tree are always identical. # The gzip header file modification time is not currenctly - # accessible from the tarfile API, see: https://bugs.python.org/issue31526 + # accessible from the tarfile API, see: + # https://bugs.python.org/issue31526 buf.seek(0) assert buf.read(2) == b'\x1f\x8b', 'Invalid gzip header' buf.seek(4) buf.write(struct.pack(' # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for archive support.""" from io import BytesIO import tarfile import struct from unittest import skipUnless from dulwich.archive import tar_stream from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Blob, Tree, ) from dulwich.tests import ( TestCase, ) from dulwich.tests.utils import ( build_commit_graph, ) try: from mock import patch except ImportError: patch = None class ArchiveTests(TestCase): def test_empty(self): store = MemoryObjectStore() c1, c2, c3 = build_commit_graph(store, [[1], [2, 1], [3, 1, 2]]) tree = store[c3.tree] stream = b''.join(tar_stream(store, tree, 10)) out = BytesIO(stream) tf = tarfile.TarFile(fileobj=out) self.addCleanup(tf.close) self.assertEqual([], tf.getnames()) def _get_example_tar_stream(self, *tar_stream_args, **tar_stream_kwargs): store = MemoryObjectStore() b1 = Blob.from_string(b"somedata") store.add_object(b1) t1 = Tree() t1.add(b"somename", 0o100644, b1.id) store.add_object(t1) - stream = b''.join(tar_stream(store, t1, *tar_stream_args, **tar_stream_kwargs)) + stream = b''.join( + tar_stream(store, t1, *tar_stream_args, **tar_stream_kwargs)) return BytesIO(stream) def test_simple(self): stream = self._get_example_tar_stream(mtime=0) tf = tarfile.TarFile(fileobj=stream) self.addCleanup(tf.close) self.assertEqual(["somename"], tf.getnames()) def test_gzip_mtime(self): stream = self._get_example_tar_stream(mtime=1234, format='gz') expected_mtime = struct.pack('