diff --git a/dulwich/archive.py b/dulwich/archive.py index b7164259..d3ab9dfc 100644 --- a/dulwich/archive.py +++ b/dulwich/archive.py @@ -1,112 +1,112 @@ # archive.py -- Creating an archive from a tarball # Copyright (C) 2015 Jonas Haag # Copyright (C) 2015 Jelmer Vernooij # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # or (at your option) a later version of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. """Generates tarballs for Git trees. """ import posixpath import stat import tarfile from io import BytesIO from contextlib import closing class ListBytesIO(object): """Turn a list of bytestrings into a file-like object. This is similar to creating a `BytesIO` from a concatenation of the bytestring list, but saves memory by NOT creating one giant bytestring first:: BytesIO(b''.join(list_of_bytestrings)) =~= ListBytesIO(list_of_bytestrings) """ def __init__(self, contents): self.contents = contents self.pos = (0, 0) def read(self, maxbytes=None): if maxbytes < 0: maxbytes = float('inf') buf = [] chunk, cursor = self.pos while chunk < len(self.contents): if maxbytes < len(self.contents[chunk]) - cursor: buf.append(self.contents[chunk][cursor:cursor+maxbytes]) cursor += maxbytes self.pos = (chunk, cursor) break else: buf.append(self.contents[chunk][cursor:]) maxbytes -= len(self.contents[chunk]) - cursor chunk += 1 cursor = 0 self.pos = (chunk, cursor) return b''.join(buf) def tar_stream(store, tree, mtime, format=''): """Generate a tar stream for the contents of a Git tree. Returns a generator that lazily assembles a .tar.gz archive, yielding it in pieces (bytestrings). To obtain the complete .tar.gz binary file, simply concatenate these chunks. :param store: Object store to retrieve objects from :param tree: Tree object for the tree root :param mtime: UNIX timestamp that is assigned as the modification time for all files :param format: Optional compression format for tarball :return: Bytestrings """ buf = BytesIO() with closing(tarfile.open(None, "w:%s" % format, buf)) as tar: for entry_abspath, entry in _walk_tree(store, tree): try: blob = store[entry.sha] except KeyError: # Entry probably refers to a submodule, which we don't yet support. continue data = ListBytesIO(blob.chunked) info = tarfile.TarInfo() - info.name = entry_abspath + info.name = entry_abspath.decode('ascii') # tarfile only works with ascii. info.size = blob.raw_length() info.mode = entry.mode info.mtime = mtime tar.addfile(info, data) yield buf.getvalue() buf.truncate(0) buf.seek(0) yield buf.getvalue() -def _walk_tree(store, tree, root=''): +def _walk_tree(store, tree, root=b''): """Recursively walk a dulwich Tree, yielding tuples of (absolute path, TreeEntry) along the way. """ for entry in tree.iteritems(): entry_abspath = posixpath.join(root, entry.path) if stat.S_ISDIR(entry.mode): for _ in _walk_tree(store, store[entry.sha], entry_abspath): yield _ else: yield (entry_abspath, entry) diff --git a/dulwich/objectspec.py b/dulwich/objectspec.py index 880c6748..3fa72068 100644 --- a/dulwich/objectspec.py +++ b/dulwich/objectspec.py @@ -1,153 +1,153 @@ # objectspec.py -- Object specification # Copyright (C) 2014 Jelmer Vernooij # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # of the License or (at your option) a later version of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. """Object specification.""" def to_bytes(text): if getattr(text, "encode", None) is not None: text = text.encode('ascii') return text def parse_object(repo, objectish): """Parse a string referring to an object. :param repo: A `Repo` object :param objectish: A string referring to an object :return: A git object :raise KeyError: If the object can not be found """ objectish = to_bytes(objectish) return repo[objectish] def parse_ref(container, refspec): """Parse a string referring to a reference. :param container: A RefsContainer object :param refspec: A string referring to a ref :return: A ref :raise KeyError: If the ref can not be found """ refspec = to_bytes(refspec) for ref in [refspec, b"refs/heads/" + refspec]: if ref in container: return ref else: raise KeyError(refspec) def parse_reftuple(lh_container, rh_container, refspec): """Parse a reftuple spec. :param lh_container: A RefsContainer object :param hh_container: A RefsContainer object :param refspec: A string :return: A tuple with left and right ref :raise KeyError: If one of the refs can not be found """ if refspec.startswith(b"+"): force = True refspec = refspec[1:] else: force = False refspec = to_bytes(refspec) if b":" in refspec: (lh, rh) = refspec.split(b":") else: lh = rh = refspec if rh == b"": lh = None else: lh = parse_ref(lh_container, lh) if rh == b"": rh = None else: try: rh = parse_ref(rh_container, rh) except KeyError: # TODO: check force? if not b"/" in rh: rh = b"refs/heads/" + rh return (lh, rh, force) def parse_reftuples(lh_container, rh_container, refspecs): """Parse a list of reftuple specs to a list of reftuples. :param lh_container: A RefsContainer object :param hh_container: A RefsContainer object :param refspecs: A list of refspecs or a string :return: A list of refs :raise KeyError: If one of the refs can not be found """ if not isinstance(refspecs, list): refspecs = [refspecs] ret = [] # TODO: Support * in refspecs for refspec in refspecs: ret.append(parse_reftuple(lh_container, rh_container, refspec)) return ret def parse_refs(container, refspecs): """Parse a list of refspecs to a list of refs. :param container: A RefsContainer object :param refspecs: A list of refspecs or a string :return: A list of refs :raise KeyError: If one of the refs can not be found """ # TODO: Support * in refspecs if not isinstance(refspecs, list): refspecs = [refspecs] ret = [] for refspec in refspecs: ret.append(parse_ref(container, refspec)) return ret def parse_commit_range(repo, committishs): """Parse a string referring to a range of commits. :param repo: A `Repo` object :param committishs: A string referring to a range of commits. :return: An iterator over `Commit` objects :raise KeyError: When the reference commits can not be found :raise ValueError: If the range can not be parsed """ committishs = to_bytes(committishs) # TODO(jelmer): Support more than a single commit.. - return iter([parse_commit(committishs)]) + return iter([parse_commit(repo, committishs)]) def parse_commit(repo, committish): """Parse a string referring to a single commit. :param repo: A` Repo` object :param commitish: A string referring to a single commit. :return: A Commit object :raise KeyError: When the reference commits can not be found :raise ValueError: If the range can not be parsed """ committish = to_bytes(committish) return repo[committish] # For now.. # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation diff --git a/dulwich/tests/test_archive.py b/dulwich/tests/test_archive.py index 85778a94..0186e76e 100644 --- a/dulwich/tests/test_archive.py +++ b/dulwich/tests/test_archive.py @@ -1,63 +1,63 @@ # test_archive.py -- tests for archive # Copyright (C) 2015 Jelmer Vernooij # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # of the License or (at your option) a later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. """Tests for archive support.""" from io import BytesIO import tarfile from dulwich.archive import tar_stream from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Blob, Tree, ) from dulwich.tests import ( TestCase, ) from dulwich.tests.utils import ( build_commit_graph, ) class ArchiveTests(TestCase): def test_empty(self): store = MemoryObjectStore() c1, c2, c3 = build_commit_graph(store, [[1], [2, 1], [3, 1, 2]]) tree = store[c3.tree] - stream = ''.join(tar_stream(store, tree, 10)) + stream = b''.join(tar_stream(store, tree, 10)) out = BytesIO(stream) tf = tarfile.TarFile(fileobj=out) self.addCleanup(tf.close) self.assertEqual([], tf.getnames()) def test_simple(self): store = MemoryObjectStore() - b1 = Blob.from_string("somedata") + b1 = Blob.from_string(b"somedata") store.add_object(b1) t1 = Tree() t1.add(b"somename", 0o100644, b1.id) store.add_object(t1) - stream = ''.join(tar_stream(store, t1, 10)) + stream = b''.join(tar_stream(store, t1, 10)) out = BytesIO(stream) tf = tarfile.TarFile(fileobj=out) self.addCleanup(tf.close) self.assertEqual(["somename"], tf.getnames())