diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py index c441e5ee..5627ee90 100644 --- a/dulwich/tests/test_walk.py +++ b/dulwich/tests/test_walk.py @@ -1,417 +1,543 @@ # test_walk.py -- Tests for commit walking functionality. # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for commit walking functionality.""" from itertools import ( permutations, ) from dulwich.diff_tree import ( + CHANGE_ADD, CHANGE_MODIFY, CHANGE_RENAME, TreeChange, RenameDetector, ) from dulwich.errors import ( MissingCommitError, ) from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( Commit, Blob, ) from dulwich.walk import ( ORDER_TOPO, WalkEntry, Walker, _topo_reorder ) from dulwich.tests import TestCase from dulwich.tests.utils import ( F, make_object, build_commit_graph, ) class TestWalkEntry(object): def __init__(self, commit, changes): self.commit = commit self.changes = changes def __repr__(self): return '' % ( self.commit.id, self.changes) def __eq__(self, other): if not isinstance(other, WalkEntry) or self.commit != other.commit: return False if self.changes is None: return True return self.changes == other.changes() class WalkerTest(TestCase): def setUp(self): super(WalkerTest, self).setUp() self.store = MemoryObjectStore() def make_commits(self, commit_spec, **kwargs): times = kwargs.pop('times', []) attrs = kwargs.pop('attrs', {}) for i, t in enumerate(times): attrs.setdefault(i + 1, {})['commit_time'] = t return build_commit_graph(self.store, commit_spec, attrs=attrs, **kwargs) def make_linear_commits(self, num_commits, **kwargs): commit_spec = [] for i in range(1, num_commits + 1): c = [i] if i > 1: c.append(i - 1) commit_spec.append(c) return self.make_commits(commit_spec, **kwargs) def assertWalkYields(self, expected, *args, **kwargs): walker = Walker(self.store, *args, **kwargs) expected = list(expected) for i, entry in enumerate(expected): if isinstance(entry, Commit): expected[i] = TestWalkEntry(entry, None) actual = list(walker) self.assertEqual(expected, actual) def test_linear(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c1], [c1.id]) self.assertWalkYields([c2, c1], [c2.id]) self.assertWalkYields([c3, c2, c1], [c3.id]) self.assertWalkYields([c3, c2, c1], [c3.id, c1.id]) self.assertWalkYields([c3, c2], [c3.id], exclude=[c1.id]) self.assertWalkYields([c3, c2], [c3.id, c1.id], exclude=[c1.id]) self.assertWalkYields([c3], [c3.id, c1.id], exclude=[c2.id]) def test_missing(self): cs = list(reversed(self.make_linear_commits(20))) self.assertWalkYields(cs, [cs[0].id]) # Exactly how close we can get to a missing commit depends on our # implementation (in particular the choice of _MAX_EXTRA_COMMITS), but # we should at least be able to walk some history in a broken repo. del self.store[cs[-1].id] for i in range(1, 11): self.assertWalkYields(cs[:i], [cs[0].id], max_entries=i) self.assertRaises(MissingCommitError, Walker, self.store, [cs[-1].id]) def test_branch(self): c1, x2, x3, y4 = self.make_commits([[1], [2, 1], [3, 2], [4, 1]]) self.assertWalkYields([x3, x2, c1], [x3.id]) self.assertWalkYields([y4, c1], [y4.id]) self.assertWalkYields([y4, x2, c1], [y4.id, x2.id]) self.assertWalkYields([y4, x2], [y4.id, x2.id], exclude=[c1.id]) self.assertWalkYields([y4, x3], [y4.id, x3.id], exclude=[x2.id]) self.assertWalkYields([y4], [y4.id], exclude=[x3.id]) self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id]) def test_merge(self): c1, c2, c3, c4 = self.make_commits([[1], [2, 1], [3, 1], [4, 2, 3]]) self.assertWalkYields([c4, c3, c2, c1], [c4.id]) self.assertWalkYields([c3, c1], [c3.id]) self.assertWalkYields([c2, c1], [c2.id]) self.assertWalkYields([c4, c3], [c4.id], exclude=[c2.id]) self.assertWalkYields([c4, c2], [c4.id], exclude=[c3.id]) def test_reverse(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True) def test_max_entries(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c3, c2, c1], [c3.id], max_entries=3) self.assertWalkYields([c3, c2], [c3.id], max_entries=2) self.assertWalkYields([c3], [c3.id], max_entries=1) def test_reverse_after_max_entries(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c1, c2, c3], [c3.id], max_entries=3, reverse=True) self.assertWalkYields([c2, c3], [c3.id], max_entries=2, reverse=True) self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True) def test_changes_one_parent(self): blob_a1 = make_object(Blob, data=b'a1') blob_a2 = make_object(Blob, data=b'a2') blob_b2 = make_object(Blob, data=b'b2') c1, c2 = self.make_linear_commits( 2, trees={1: [(b'a', blob_a1)], 2: [(b'a', blob_a2), (b'b', blob_b2)]}) e1 = TestWalkEntry(c1, [TreeChange.add((b'a', F, blob_a1.id))]) e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a2.id)), TreeChange.add((b'b', F, blob_b2.id))]) self.assertWalkYields([e2, e1], [c2.id]) def test_changes_multiple_parents(self): blob_a1 = make_object(Blob, data=b'a1') blob_b2 = make_object(Blob, data=b'b2') blob_a3 = make_object(Blob, data=b'a3') c1, c2, c3 = self.make_commits( [[1], [2], [3, 1, 2]], trees={1: [(b'a', blob_a1)], 2: [(b'b', blob_b2)], 3: [(b'a', blob_a3), (b'b', blob_b2)]}) # a is a modify/add conflict and b is not conflicted. changes = [[ TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)), TreeChange.add((b'a', F, blob_a3.id)), ]] self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id], exclude=[c1.id, c2.id]) def test_path_matches(self): walker = Walker(None, [], paths=[b'foo', b'bar', b'baz/quux']) self.assertTrue(walker._path_matches(b'foo')) self.assertTrue(walker._path_matches(b'foo/a')) self.assertTrue(walker._path_matches(b'foo/a/b')) self.assertTrue(walker._path_matches(b'bar')) self.assertTrue(walker._path_matches(b'baz/quux')) self.assertTrue(walker._path_matches(b'baz/quux/a')) self.assertFalse(walker._path_matches(None)) self.assertFalse(walker._path_matches(b'oops')) self.assertFalse(walker._path_matches(b'fool')) self.assertFalse(walker._path_matches(b'baz')) self.assertFalse(walker._path_matches(b'baz/quu')) def test_paths(self): blob_a1 = make_object(Blob, data=b'a1') blob_b2 = make_object(Blob, data=b'b2') blob_a3 = make_object(Blob, data=b'a3') blob_b3 = make_object(Blob, data=b'b3') c1, c2, c3 = self.make_linear_commits( 3, trees={1: [(b'a', blob_a1)], 2: [(b'a', blob_a1), (b'x/b', blob_b2)], 3: [(b'a', blob_a3), (b'x/b', blob_b3)]}) self.assertWalkYields([c3, c2, c1], [c3.id]) self.assertWalkYields([c3, c1], [c3.id], paths=[b'a']) self.assertWalkYields([c3, c2], [c3.id], paths=[b'x/b']) # All changes are included, not just for requested paths. changes = [ TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)), TreeChange(CHANGE_MODIFY, (b'x/b', F, blob_b2.id), (b'x/b', F, blob_b3.id)), ] self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id], max_entries=1, paths=[b'a']) def test_paths_subtree(self): blob_a = make_object(Blob, data=b'a') blob_b = make_object(Blob, data=b'b') c1, c2, c3 = self.make_linear_commits( 3, trees={1: [(b'x/a', blob_a)], 2: [(b'b', blob_b), (b'x/a', blob_a)], 3: [(b'b', blob_b), (b'x/a', blob_a), (b'x/b', blob_b)]}) self.assertWalkYields([c2], [c3.id], paths=[b'b']) self.assertWalkYields([c3, c1], [c3.id], paths=[b'x']) def test_paths_max_entries(self): blob_a = make_object(Blob, data=b'a') blob_b = make_object(Blob, data=b'b') c1, c2 = self.make_linear_commits( 2, trees={1: [(b'a', blob_a)], 2: [(b'a', blob_a), (b'b', blob_b)]}) self.assertWalkYields([c2], [c2.id], paths=[b'b'], max_entries=1) self.assertWalkYields([c1], [c1.id], paths=[b'a'], max_entries=1) def test_paths_merge(self): blob_a1 = make_object(Blob, data=b'a1') blob_a2 = make_object(Blob, data=b'a2') blob_a3 = make_object(Blob, data=b'a3') x1, y2, m3, m4 = self.make_commits( [[1], [2], [3, 1, 2], [4, 1, 2]], trees={1: [(b'a', blob_a1)], 2: [(b'a', blob_a2)], 3: [(b'a', blob_a3)], 4: [(b'a', blob_a1)]}) # Non-conflicting self.assertWalkYields([m3, y2, x1], [m3.id], paths=[b'a']) self.assertWalkYields([y2, x1], [m4.id], paths=[b'a']) def test_changes_with_renames(self): blob = make_object(Blob, data=b'blob') c1, c2 = self.make_linear_commits( 2, trees={1: [(b'a', blob)], 2: [(b'b', blob)]}) entry_a = (b'a', F, blob.id) entry_b = (b'b', F, blob.id) changes_without_renames = [TreeChange.delete(entry_a), TreeChange.add(entry_b)] changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)] self.assertWalkYields( [TestWalkEntry(c2, changes_without_renames)], [c2.id], max_entries=1) detector = RenameDetector(self.store) self.assertWalkYields( [TestWalkEntry(c2, changes_with_renames)], [c2.id], max_entries=1, rename_detector=detector) def test_follow_rename(self): blob = make_object(Blob, data=b'blob') names = [b'a', b'a', b'b', b'b', b'c', b'c'] trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names)) c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees) self.assertWalkYields([c5], [c6.id], paths=[b'c']) e = lambda n: (n, F, blob.id) self.assertWalkYields( [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'c'))]), TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'b'))]), TestWalkEntry(c1, [TreeChange.add(e(b'a'))])], [c6.id], paths=[b'c'], follow=True) def test_follow_rename_remove_path(self): blob = make_object(Blob, data=b'blob') _, _, _, c4, c5, c6 = self.make_linear_commits( 6, trees={1: [(b'a', blob), (b'c', blob)], 2: [], 3: [], 4: [(b'b', blob)], 5: [(b'a', blob)], 6: [(b'c', blob)]}) e = lambda n: (n, F, blob.id) # Once the path changes to b, we aren't interested in a or c anymore. self.assertWalkYields( [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'c'))]), TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'a'))]), TestWalkEntry(c4, [TreeChange.add(e(b'b'))])], [c6.id], paths=[b'c'], follow=True) def test_since(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c3, c2, c1], [c3.id], since=-1) self.assertWalkYields([c3, c2, c1], [c3.id], since=0) self.assertWalkYields([c3, c2], [c3.id], since=1) self.assertWalkYields([c3, c2], [c3.id], since=99) self.assertWalkYields([c3, c2], [c3.id], since=100) self.assertWalkYields([c3], [c3.id], since=101) self.assertWalkYields([c3], [c3.id], since=199) self.assertWalkYields([c3], [c3.id], since=200) self.assertWalkYields([], [c3.id], since=201) self.assertWalkYields([], [c3.id], since=300) def test_until(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([], [c3.id], until=-1) self.assertWalkYields([c1], [c3.id], until=0) self.assertWalkYields([c1], [c3.id], until=1) self.assertWalkYields([c1], [c3.id], until=99) self.assertWalkYields([c2, c1], [c3.id], until=100) self.assertWalkYields([c2, c1], [c3.id], until=101) self.assertWalkYields([c2, c1], [c3.id], until=199) self.assertWalkYields([c3, c2, c1], [c3.id], until=200) self.assertWalkYields([c3, c2, c1], [c3.id], until=201) self.assertWalkYields([c3, c2, c1], [c3.id], until=300) def test_since_until(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([], [c3.id], since=100, until=99) self.assertWalkYields([c3, c2, c1], [c3.id], since=-1, until=201) self.assertWalkYields([c2], [c3.id], since=100, until=100) self.assertWalkYields([c2], [c3.id], since=50, until=150) def test_since_over_scan(self): commits = self.make_linear_commits( 11, times=[9, 0, 1, 2, 3, 4, 5, 8, 6, 7, 9]) c8, _, c10, c11 = commits[-4:] del self.store[commits[0].id] # c9 is older than we want to walk, but is out of order with its parent, # so we need to walk past it to get to c8. # c1 would also match, but we've deleted it, and it should get pruned # even with over-scanning. self.assertWalkYields([c11, c10, c8], [c11.id], since=7) def assertTopoOrderEqual(self, expected_commits, commits): entries = [TestWalkEntry(c, None) for c in commits] actual_ids = [e.commit.id for e in list(_topo_reorder(entries))] self.assertEqual([c.id for c in expected_commits], actual_ids) def test_topo_reorder_linear(self): commits = self.make_linear_commits(5) commits.reverse() for perm in permutations(commits): self.assertTopoOrderEqual(commits, perm) def test_topo_reorder_multiple_parents(self): c1, c2, c3 = self.make_commits([[1], [2], [3, 1, 2]]) # Already sorted, so totally FIFO. self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1]) self.assertTopoOrderEqual([c3, c1, c2], [c3, c1, c2]) # c3 causes one parent to be yielded. self.assertTopoOrderEqual([c3, c2, c1], [c2, c3, c1]) self.assertTopoOrderEqual([c3, c1, c2], [c1, c3, c2]) # c3 causes both parents to be yielded. self.assertTopoOrderEqual([c3, c2, c1], [c1, c2, c3]) self.assertTopoOrderEqual([c3, c2, c1], [c2, c1, c3]) def test_topo_reorder_multiple_children(self): c1, c2, c3 = self.make_commits([[1], [2, 1], [3, 1]]) # c2 and c3 are FIFO but c1 moves to the end. self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1]) self.assertTopoOrderEqual([c3, c2, c1], [c3, c1, c2]) self.assertTopoOrderEqual([c3, c2, c1], [c1, c3, c2]) self.assertTopoOrderEqual([c2, c3, c1], [c2, c3, c1]) self.assertTopoOrderEqual([c2, c3, c1], [c2, c1, c3]) self.assertTopoOrderEqual([c2, c3, c1], [c1, c2, c3]) def test_out_of_order_children(self): c1, c2, c3, c4, c5 = self.make_commits( [[1], [2, 1], [3, 2], [4, 1], [5, 3, 4]], times=[2, 1, 3, 4, 5]) self.assertWalkYields([c5, c4, c3, c1, c2], [c5.id]) self.assertWalkYields([c5, c4, c3, c2, c1], [c5.id], order=ORDER_TOPO) def test_out_of_order_with_exclude(self): # Create the following graph: # c1-------x2---m6 # \ / # \-y3--y4-/--y5 # Due to skew, y5 is the oldest commit. c1, x2, y3, y4, y5, m6 = self.make_commits( [[1], [2, 1], [3, 1], [4, 3], [5, 4], [6, 2, 4]], times=[2, 3, 4, 5, 1, 6]) self.assertWalkYields([m6, y4, y3, x2, c1], [m6.id]) # Ensure that c1..y4 get excluded even though they're popped from the # priority queue long before y5. self.assertWalkYields([m6, x2], [m6.id], exclude=[y5.id]) def test_empty_walk(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([], [c3.id], exclude=[c3.id]) + + +class WalkEntryTest(TestCase): + + def setUp(self): + super(WalkEntryTest, self).setUp() + self.store = MemoryObjectStore() + + def make_commits(self, commit_spec, **kwargs): + times = kwargs.pop('times', []) + attrs = kwargs.pop('attrs', {}) + for i, t in enumerate(times): + attrs.setdefault(i + 1, {})['commit_time'] = t + return build_commit_graph(self.store, commit_spec, attrs=attrs, + **kwargs) + + def make_linear_commits(self, num_commits, **kwargs): + commit_spec = [] + for i in range(1, num_commits + 1): + c = [i] + if i > 1: + c.append(i - 1) + commit_spec.append(c) + return self.make_commits(commit_spec, **kwargs) + + def test_all_changes(self): + # Construct a commit with 2 files in different subdirectories. + blob_a = make_object(Blob, data=b'a') + blob_b = make_object(Blob, data=b'b') + c1 = self.make_linear_commits( + 1, + trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]}, + )[0] + + # Get the WalkEntry for the commit. + walker = Walker(self.store, c1.id) + walker_entry = list(walker)[0] + changes = walker_entry.changes() + + # Compare the changes with the expected values. + entry_a = (b'x/a', F, blob_a.id) + entry_b = (b'y/b', F, blob_b.id) + self.assertEqual( + [TreeChange.add(entry_a), + TreeChange.add(entry_b)], + changes, + ) + + def test_all_with_merge(self): + blob_a = make_object(Blob, data=b'a') + blob_a2 = make_object(Blob, data=b'a2') + blob_b = make_object(Blob, data=b'b') + blob_b2 = make_object(Blob, data=b'b2') + x1, y2, m3 = self.make_commits( + [[1], [2], [3, 1, 2]], + trees={1: [(b'x/a', blob_a)], + 2: [(b'y/b', blob_b)], + 3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]}) + + # Get the WalkEntry for the merge commit. + walker = Walker(self.store, m3.id) + entries = list(walker) + walker_entry = entries[0] + self.assertEqual(walker_entry.commit.id, m3.id) + changes = walker_entry.changes() + self.assertEqual(2, len(changes)) + + entry_a = (b'x/a', F, blob_a.id) + entry_a2 = (b'x/a', F, blob_a2.id) + entry_b = (b'y/b', F, blob_b.id) + entry_b2 = (b'y/b', F, blob_b2.id) + self.assertEqual( + [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2), + TreeChange.add(entry_a2)], + [TreeChange.add(entry_b2), + TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]], + changes, + ) + + def test_filter_changes(self): + # Construct a commit with 2 files in different subdirectories. + blob_a = make_object(Blob, data=b'a') + blob_b = make_object(Blob, data=b'b') + c1 = self.make_linear_commits( + 1, + trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]}, + )[0] + + # Get the WalkEntry for the commit. + walker = Walker(self.store, c1.id) + walker_entry = list(walker)[0] + changes = walker_entry.changes(path_prefix=b'x') + + # Compare the changes with the expected values. + entry_a = (b'a', F, blob_a.id) + self.assertEqual( + [TreeChange.add(entry_a)], + changes, + ) + + def test_filter_with_merge(self): + blob_a = make_object(Blob, data=b'a') + blob_a2 = make_object(Blob, data=b'a2') + blob_b = make_object(Blob, data=b'b') + blob_b2 = make_object(Blob, data=b'b2') + x1, y2, m3 = self.make_commits( + [[1], [2], [3, 1, 2]], + trees={1: [(b'x/a', blob_a)], + 2: [(b'y/b', blob_b)], + 3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]}) + + # Get the WalkEntry for the merge commit. + walker = Walker(self.store, m3.id) + entries = list(walker) + walker_entry = entries[0] + self.assertEqual(walker_entry.commit.id, m3.id) + changes = walker_entry.changes(b'x') + self.assertEqual(1, len(changes)) + + entry_a = (b'a', F, blob_a.id) + entry_a2 = (b'a', F, blob_a2.id) + self.assertEqual( + [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2)]], + changes, + ) diff --git a/dulwich/walk.py b/dulwich/walk.py index 66cc7d1a..f5696543 100644 --- a/dulwich/walk.py +++ b/dulwich/walk.py @@ -1,374 +1,405 @@ # walk.py -- General implementation of walking commits and their contents. # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """General implementation of walking commits and their contents.""" from collections import defaultdict import collections import heapq from itertools import chain from dulwich.diff_tree import ( RENAME_CHANGE_TYPES, tree_changes, tree_changes_for_merge, RenameDetector, ) from dulwich.errors import ( MissingCommitError, ) ORDER_DATE = 'date' ORDER_TOPO = 'topo' ALL_ORDERS = (ORDER_DATE, ORDER_TOPO) # Maximum number of commits to walk past a commit time boundary. _MAX_EXTRA_COMMITS = 5 class WalkEntry(object): """Object encapsulating a single result from a walk.""" def __init__(self, walker, commit): self.commit = commit self._store = walker.store self._get_parents = walker.get_parents - self._changes = None + self._changes = {} self._rename_detector = walker.rename_detector - def changes(self): + def changes(self, path_prefix=None): """Get the tree changes for this entry. + :param path_prefix: Portion of the path in the repository to + use to filter changes. Must be a directory name. Must be + a full, valid, path reference (no partial names or wildcards). :return: For commits with up to one parent, a list of TreeChange objects; if the commit has no parents, these will be relative to the empty tree. For merge commits, a list of lists of TreeChange objects; see dulwich.diff.tree_changes_for_merge. """ - if self._changes is None: + cached = self._changes.get(path_prefix) + if cached is None: commit = self.commit if not self._get_parents(commit): changes_func = tree_changes parent = None elif len(self._get_parents(commit)) == 1: changes_func = tree_changes parent = self._store[self._get_parents(commit)[0]].tree + if path_prefix: + mode, subtree_sha = parent.lookup_path( + self._store.__getitem__, + path_prefix, + ) + parent = self._store[subtree_sha] else: changes_func = tree_changes_for_merge parent = [self._store[p].tree for p in self._get_parents(commit)] - self._changes = list(changes_func( - self._store, parent, commit.tree, + if path_prefix: + parent_trees = [self._store[p] for p in parent] + parent = [] + for p in parent_trees: + try: + mode, st = p.lookup_path( + self._store.__getitem__, + path_prefix, + ) + except KeyError: + pass + else: + parent.append(st) + commit_tree_sha = commit.tree + if path_prefix: + commit_tree = self._store[commit_tree_sha] + mode, commit_tree_sha = commit_tree.lookup_path( + self._store.__getitem__, + path_prefix, + ) + cached = list(changes_func( + self._store, parent, commit_tree_sha, rename_detector=self._rename_detector)) - return self._changes + self._changes[path_prefix] = cached + return self._changes[path_prefix] def __repr__(self): return '' % ( self.commit.id, self.changes()) class _CommitTimeQueue(object): """Priority queue of WalkEntry objects by commit time.""" def __init__(self, walker): self._walker = walker self._store = walker.store self._get_parents = walker.get_parents self._excluded = walker.excluded self._pq = [] self._pq_set = set() self._seen = set() self._done = set() self._min_time = walker.since self._last = None self._extra_commits_left = _MAX_EXTRA_COMMITS self._is_finished = False for commit_id in chain(walker.include, walker.excluded): self._push(commit_id) def _push(self, commit_id): try: commit = self._store[commit_id] except KeyError: raise MissingCommitError(commit_id) if commit_id not in self._pq_set and commit_id not in self._done: heapq.heappush(self._pq, (-commit.commit_time, commit)) self._pq_set.add(commit_id) self._seen.add(commit_id) def _exclude_parents(self, commit): excluded = self._excluded seen = self._seen todo = [commit] while todo: commit = todo.pop() for parent in self._get_parents(commit): if parent not in excluded and parent in seen: # TODO: This is inefficient unless the object store does # some caching (which DiskObjectStore currently does not). # We could either add caching in this class or pass around # parsed queue entry objects instead of commits. todo.append(self._store[parent]) excluded.add(parent) def next(self): if self._is_finished: return None while self._pq: _, commit = heapq.heappop(self._pq) sha = commit.id self._pq_set.remove(sha) if sha in self._done: continue self._done.add(sha) for parent_id in self._get_parents(commit): self._push(parent_id) reset_extra_commits = True is_excluded = sha in self._excluded if is_excluded: self._exclude_parents(commit) if self._pq and all(c.id in self._excluded for _, c in self._pq): _, n = self._pq[0] if self._last and n.commit_time >= self._last.commit_time: # If the next commit is newer than the last one, we need # to keep walking in case its parents (which we may not # have seen yet) are excluded. This gives the excluded # set a chance to "catch up" while the commit is still # in the Walker's output queue. reset_extra_commits = True else: reset_extra_commits = False if (self._min_time is not None and commit.commit_time < self._min_time): # We want to stop walking at min_time, but commits at the # boundary may be out of order with respect to their parents. So # we walk _MAX_EXTRA_COMMITS more commits once we hit this # boundary. reset_extra_commits = False if reset_extra_commits: # We're not at a boundary, so reset the counter. self._extra_commits_left = _MAX_EXTRA_COMMITS else: self._extra_commits_left -= 1 if not self._extra_commits_left: break if not is_excluded: self._last = commit return WalkEntry(self._walker, commit) self._is_finished = True return None __next__ = next class Walker(object): """Object for performing a walk of commits in a store. Walker objects are initialized with a store and other options and can then be treated as iterators of Commit objects. """ def __init__(self, store, include, exclude=None, order=ORDER_DATE, reverse=False, max_entries=None, paths=None, rename_detector=None, follow=False, since=None, until=None, get_parents=lambda commit: commit.parents, queue_cls=_CommitTimeQueue): """Constructor. :param store: ObjectStore instance for looking up objects. :param include: Iterable of SHAs of commits to include along with their ancestors. :param exclude: Iterable of SHAs of commits to exclude along with their ancestors, overriding includes. :param order: ORDER_* constant specifying the order of results. Anything other than ORDER_DATE may result in O(n) memory usage. :param reverse: If True, reverse the order of output, requiring O(n) memory. :param max_entries: The maximum number of entries to yield, or None for no limit. :param paths: Iterable of file or subtree paths to show entries for. :param rename_detector: diff.RenameDetector object for detecting renames. :param follow: If True, follow path across renames/copies. Forces a default rename_detector. :param since: Timestamp to list commits after. :param until: Timestamp to list commits before. :param get_parents: Method to retrieve the parents of a commit :param queue_cls: A class to use for a queue of commits, supporting the iterator protocol. The constructor takes a single argument, the Walker. """ # Note: when adding arguments to this method, please also update # dulwich.repo.BaseRepo.get_walker if order not in ALL_ORDERS: raise ValueError('Unknown walk order %s' % order) self.store = store if not isinstance(include, list): include = [include] self.include = include self.excluded = set(exclude or []) self.order = order self.reverse = reverse self.max_entries = max_entries self.paths = paths and set(paths) or None if follow and not rename_detector: rename_detector = RenameDetector(store) self.rename_detector = rename_detector self.get_parents = get_parents self.follow = follow self.since = since self.until = until self._num_entries = 0 self._queue = queue_cls(self) self._out_queue = collections.deque() def _path_matches(self, changed_path): if changed_path is None: return False for followed_path in self.paths: if changed_path == followed_path: return True if (changed_path.startswith(followed_path) and changed_path[len(followed_path)] == b'/'[0]): return True return False def _change_matches(self, change): if not change: return False old_path = change.old.path new_path = change.new.path if self._path_matches(new_path): if self.follow and change.type in RENAME_CHANGE_TYPES: self.paths.add(old_path) self.paths.remove(new_path) return True elif self._path_matches(old_path): return True return False def _should_return(self, entry): """Determine if a walk entry should be returned.. :param entry: The WalkEntry to consider. :return: True if the WalkEntry should be returned by this walk, or False otherwise (e.g. if it doesn't match any requested paths). """ commit = entry.commit if self.since is not None and commit.commit_time < self.since: return False if self.until is not None and commit.commit_time > self.until: return False if commit.id in self.excluded: return False if self.paths is None: return True if len(self.get_parents(commit)) > 1: for path_changes in entry.changes(): # For merge commits, only include changes with conflicts for # this path. Since a rename conflict may include different # old.paths, we have to check all of them. for change in path_changes: if self._change_matches(change): return True else: for change in entry.changes(): if self._change_matches(change): return True return None def _next(self): max_entries = self.max_entries while max_entries is None or self._num_entries < max_entries: entry = next(self._queue) if entry is not None: self._out_queue.append(entry) if entry is None or len(self._out_queue) > _MAX_EXTRA_COMMITS: if not self._out_queue: return None entry = self._out_queue.popleft() if self._should_return(entry): self._num_entries += 1 return entry return None def _reorder(self, results): """Possibly reorder a results iterator. :param results: An iterator of WalkEntry objects, in the order returned from the queue_cls. :return: An iterator or list of WalkEntry objects, in the order required by the Walker. """ if self.order == ORDER_TOPO: results = _topo_reorder(results, self.get_parents) if self.reverse: results = reversed(list(results)) return results def __iter__(self): return iter(self._reorder(iter(self._next, None))) def _topo_reorder(entries, get_parents=lambda commit: commit.parents): """Reorder an iterable of entries topologically. This works best assuming the entries are already in almost-topological order, e.g. in commit time order. :param entries: An iterable of WalkEntry objects. :param get_parents: Optional function for getting the parents of a commit. :return: iterator over WalkEntry objects from entries in FIFO order, except where a parent would be yielded before any of its children. """ todo = collections.deque() pending = {} num_children = defaultdict(int) for entry in entries: todo.append(entry) for p in get_parents(entry.commit): num_children[p] += 1 while todo: entry = todo.popleft() commit = entry.commit commit_id = commit.id if num_children[commit_id]: pending[commit_id] = entry continue for parent_id in get_parents(commit): num_children[parent_id] -= 1 if not num_children[parent_id]: parent_entry = pending.pop(parent_id, None) if parent_entry: todo.appendleft(parent_entry) yield entry