diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py
index c441e5ee..5627ee90 100644
--- a/dulwich/tests/test_walk.py
+++ b/dulwich/tests/test_walk.py
@@ -1,417 +1,543 @@
# test_walk.py -- Tests for commit walking functionality.
# Copyright (C) 2010 Google, Inc.
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# for a copy of the GNU General Public License
# and for a copy of the Apache
# License, Version 2.0.
#
"""Tests for commit walking functionality."""
from itertools import (
permutations,
)
from dulwich.diff_tree import (
+ CHANGE_ADD,
CHANGE_MODIFY,
CHANGE_RENAME,
TreeChange,
RenameDetector,
)
from dulwich.errors import (
MissingCommitError,
)
from dulwich.object_store import (
MemoryObjectStore,
)
from dulwich.objects import (
Commit,
Blob,
)
from dulwich.walk import (
ORDER_TOPO,
WalkEntry,
Walker,
_topo_reorder
)
from dulwich.tests import TestCase
from dulwich.tests.utils import (
F,
make_object,
build_commit_graph,
)
class TestWalkEntry(object):
def __init__(self, commit, changes):
self.commit = commit
self.changes = changes
def __repr__(self):
return '' % (
self.commit.id, self.changes)
def __eq__(self, other):
if not isinstance(other, WalkEntry) or self.commit != other.commit:
return False
if self.changes is None:
return True
return self.changes == other.changes()
class WalkerTest(TestCase):
def setUp(self):
super(WalkerTest, self).setUp()
self.store = MemoryObjectStore()
def make_commits(self, commit_spec, **kwargs):
times = kwargs.pop('times', [])
attrs = kwargs.pop('attrs', {})
for i, t in enumerate(times):
attrs.setdefault(i + 1, {})['commit_time'] = t
return build_commit_graph(self.store, commit_spec, attrs=attrs,
**kwargs)
def make_linear_commits(self, num_commits, **kwargs):
commit_spec = []
for i in range(1, num_commits + 1):
c = [i]
if i > 1:
c.append(i - 1)
commit_spec.append(c)
return self.make_commits(commit_spec, **kwargs)
def assertWalkYields(self, expected, *args, **kwargs):
walker = Walker(self.store, *args, **kwargs)
expected = list(expected)
for i, entry in enumerate(expected):
if isinstance(entry, Commit):
expected[i] = TestWalkEntry(entry, None)
actual = list(walker)
self.assertEqual(expected, actual)
def test_linear(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([c1], [c1.id])
self.assertWalkYields([c2, c1], [c2.id])
self.assertWalkYields([c3, c2, c1], [c3.id])
self.assertWalkYields([c3, c2, c1], [c3.id, c1.id])
self.assertWalkYields([c3, c2], [c3.id], exclude=[c1.id])
self.assertWalkYields([c3, c2], [c3.id, c1.id], exclude=[c1.id])
self.assertWalkYields([c3], [c3.id, c1.id], exclude=[c2.id])
def test_missing(self):
cs = list(reversed(self.make_linear_commits(20)))
self.assertWalkYields(cs, [cs[0].id])
# Exactly how close we can get to a missing commit depends on our
# implementation (in particular the choice of _MAX_EXTRA_COMMITS), but
# we should at least be able to walk some history in a broken repo.
del self.store[cs[-1].id]
for i in range(1, 11):
self.assertWalkYields(cs[:i], [cs[0].id], max_entries=i)
self.assertRaises(MissingCommitError, Walker, self.store, [cs[-1].id])
def test_branch(self):
c1, x2, x3, y4 = self.make_commits([[1], [2, 1], [3, 2], [4, 1]])
self.assertWalkYields([x3, x2, c1], [x3.id])
self.assertWalkYields([y4, c1], [y4.id])
self.assertWalkYields([y4, x2, c1], [y4.id, x2.id])
self.assertWalkYields([y4, x2], [y4.id, x2.id], exclude=[c1.id])
self.assertWalkYields([y4, x3], [y4.id, x3.id], exclude=[x2.id])
self.assertWalkYields([y4], [y4.id], exclude=[x3.id])
self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id])
def test_merge(self):
c1, c2, c3, c4 = self.make_commits([[1], [2, 1], [3, 1], [4, 2, 3]])
self.assertWalkYields([c4, c3, c2, c1], [c4.id])
self.assertWalkYields([c3, c1], [c3.id])
self.assertWalkYields([c2, c1], [c2.id])
self.assertWalkYields([c4, c3], [c4.id], exclude=[c2.id])
self.assertWalkYields([c4, c2], [c4.id], exclude=[c3.id])
def test_reverse(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True)
def test_max_entries(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([c3, c2, c1], [c3.id], max_entries=3)
self.assertWalkYields([c3, c2], [c3.id], max_entries=2)
self.assertWalkYields([c3], [c3.id], max_entries=1)
def test_reverse_after_max_entries(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([c1, c2, c3], [c3.id], max_entries=3,
reverse=True)
self.assertWalkYields([c2, c3], [c3.id], max_entries=2, reverse=True)
self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True)
def test_changes_one_parent(self):
blob_a1 = make_object(Blob, data=b'a1')
blob_a2 = make_object(Blob, data=b'a2')
blob_b2 = make_object(Blob, data=b'b2')
c1, c2 = self.make_linear_commits(
2, trees={1: [(b'a', blob_a1)],
2: [(b'a', blob_a2), (b'b', blob_b2)]})
e1 = TestWalkEntry(c1, [TreeChange.add((b'a', F, blob_a1.id))])
e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
(b'a', F, blob_a2.id)),
TreeChange.add((b'b', F, blob_b2.id))])
self.assertWalkYields([e2, e1], [c2.id])
def test_changes_multiple_parents(self):
blob_a1 = make_object(Blob, data=b'a1')
blob_b2 = make_object(Blob, data=b'b2')
blob_a3 = make_object(Blob, data=b'a3')
c1, c2, c3 = self.make_commits(
[[1], [2], [3, 1, 2]],
trees={1: [(b'a', blob_a1)], 2: [(b'b', blob_b2)],
3: [(b'a', blob_a3), (b'b', blob_b2)]})
# a is a modify/add conflict and b is not conflicted.
changes = [[
TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)),
TreeChange.add((b'a', F, blob_a3.id)),
]]
self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
exclude=[c1.id, c2.id])
def test_path_matches(self):
walker = Walker(None, [], paths=[b'foo', b'bar', b'baz/quux'])
self.assertTrue(walker._path_matches(b'foo'))
self.assertTrue(walker._path_matches(b'foo/a'))
self.assertTrue(walker._path_matches(b'foo/a/b'))
self.assertTrue(walker._path_matches(b'bar'))
self.assertTrue(walker._path_matches(b'baz/quux'))
self.assertTrue(walker._path_matches(b'baz/quux/a'))
self.assertFalse(walker._path_matches(None))
self.assertFalse(walker._path_matches(b'oops'))
self.assertFalse(walker._path_matches(b'fool'))
self.assertFalse(walker._path_matches(b'baz'))
self.assertFalse(walker._path_matches(b'baz/quu'))
def test_paths(self):
blob_a1 = make_object(Blob, data=b'a1')
blob_b2 = make_object(Blob, data=b'b2')
blob_a3 = make_object(Blob, data=b'a3')
blob_b3 = make_object(Blob, data=b'b3')
c1, c2, c3 = self.make_linear_commits(
3, trees={1: [(b'a', blob_a1)],
2: [(b'a', blob_a1), (b'x/b', blob_b2)],
3: [(b'a', blob_a3), (b'x/b', blob_b3)]})
self.assertWalkYields([c3, c2, c1], [c3.id])
self.assertWalkYields([c3, c1], [c3.id], paths=[b'a'])
self.assertWalkYields([c3, c2], [c3.id], paths=[b'x/b'])
# All changes are included, not just for requested paths.
changes = [
TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
(b'a', F, blob_a3.id)),
TreeChange(CHANGE_MODIFY, (b'x/b', F, blob_b2.id),
(b'x/b', F, blob_b3.id)),
]
self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
max_entries=1, paths=[b'a'])
def test_paths_subtree(self):
blob_a = make_object(Blob, data=b'a')
blob_b = make_object(Blob, data=b'b')
c1, c2, c3 = self.make_linear_commits(
3, trees={1: [(b'x/a', blob_a)],
2: [(b'b', blob_b), (b'x/a', blob_a)],
3: [(b'b', blob_b), (b'x/a', blob_a), (b'x/b', blob_b)]})
self.assertWalkYields([c2], [c3.id], paths=[b'b'])
self.assertWalkYields([c3, c1], [c3.id], paths=[b'x'])
def test_paths_max_entries(self):
blob_a = make_object(Blob, data=b'a')
blob_b = make_object(Blob, data=b'b')
c1, c2 = self.make_linear_commits(
2, trees={1: [(b'a', blob_a)],
2: [(b'a', blob_a), (b'b', blob_b)]})
self.assertWalkYields([c2], [c2.id], paths=[b'b'], max_entries=1)
self.assertWalkYields([c1], [c1.id], paths=[b'a'], max_entries=1)
def test_paths_merge(self):
blob_a1 = make_object(Blob, data=b'a1')
blob_a2 = make_object(Blob, data=b'a2')
blob_a3 = make_object(Blob, data=b'a3')
x1, y2, m3, m4 = self.make_commits(
[[1], [2], [3, 1, 2], [4, 1, 2]],
trees={1: [(b'a', blob_a1)],
2: [(b'a', blob_a2)],
3: [(b'a', blob_a3)],
4: [(b'a', blob_a1)]}) # Non-conflicting
self.assertWalkYields([m3, y2, x1], [m3.id], paths=[b'a'])
self.assertWalkYields([y2, x1], [m4.id], paths=[b'a'])
def test_changes_with_renames(self):
blob = make_object(Blob, data=b'blob')
c1, c2 = self.make_linear_commits(
2, trees={1: [(b'a', blob)], 2: [(b'b', blob)]})
entry_a = (b'a', F, blob.id)
entry_b = (b'b', F, blob.id)
changes_without_renames = [TreeChange.delete(entry_a),
TreeChange.add(entry_b)]
changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)]
self.assertWalkYields(
[TestWalkEntry(c2, changes_without_renames)], [c2.id], max_entries=1)
detector = RenameDetector(self.store)
self.assertWalkYields(
[TestWalkEntry(c2, changes_with_renames)], [c2.id], max_entries=1,
rename_detector=detector)
def test_follow_rename(self):
blob = make_object(Blob, data=b'blob')
names = [b'a', b'a', b'b', b'b', b'c', b'c']
trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names))
c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
self.assertWalkYields([c5], [c6.id], paths=[b'c'])
e = lambda n: (n, F, blob.id)
self.assertWalkYields(
[TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'c'))]),
TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'b'))]),
TestWalkEntry(c1, [TreeChange.add(e(b'a'))])],
[c6.id], paths=[b'c'], follow=True)
def test_follow_rename_remove_path(self):
blob = make_object(Blob, data=b'blob')
_, _, _, c4, c5, c6 = self.make_linear_commits(
6, trees={1: [(b'a', blob), (b'c', blob)],
2: [],
3: [],
4: [(b'b', blob)],
5: [(b'a', blob)],
6: [(b'c', blob)]})
e = lambda n: (n, F, blob.id)
# Once the path changes to b, we aren't interested in a or c anymore.
self.assertWalkYields(
[TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'c'))]),
TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'a'))]),
TestWalkEntry(c4, [TreeChange.add(e(b'b'))])],
[c6.id], paths=[b'c'], follow=True)
def test_since(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([c3, c2, c1], [c3.id], since=-1)
self.assertWalkYields([c3, c2, c1], [c3.id], since=0)
self.assertWalkYields([c3, c2], [c3.id], since=1)
self.assertWalkYields([c3, c2], [c3.id], since=99)
self.assertWalkYields([c3, c2], [c3.id], since=100)
self.assertWalkYields([c3], [c3.id], since=101)
self.assertWalkYields([c3], [c3.id], since=199)
self.assertWalkYields([c3], [c3.id], since=200)
self.assertWalkYields([], [c3.id], since=201)
self.assertWalkYields([], [c3.id], since=300)
def test_until(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([], [c3.id], until=-1)
self.assertWalkYields([c1], [c3.id], until=0)
self.assertWalkYields([c1], [c3.id], until=1)
self.assertWalkYields([c1], [c3.id], until=99)
self.assertWalkYields([c2, c1], [c3.id], until=100)
self.assertWalkYields([c2, c1], [c3.id], until=101)
self.assertWalkYields([c2, c1], [c3.id], until=199)
self.assertWalkYields([c3, c2, c1], [c3.id], until=200)
self.assertWalkYields([c3, c2, c1], [c3.id], until=201)
self.assertWalkYields([c3, c2, c1], [c3.id], until=300)
def test_since_until(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([], [c3.id], since=100, until=99)
self.assertWalkYields([c3, c2, c1], [c3.id], since=-1, until=201)
self.assertWalkYields([c2], [c3.id], since=100, until=100)
self.assertWalkYields([c2], [c3.id], since=50, until=150)
def test_since_over_scan(self):
commits = self.make_linear_commits(
11, times=[9, 0, 1, 2, 3, 4, 5, 8, 6, 7, 9])
c8, _, c10, c11 = commits[-4:]
del self.store[commits[0].id]
# c9 is older than we want to walk, but is out of order with its parent,
# so we need to walk past it to get to c8.
# c1 would also match, but we've deleted it, and it should get pruned
# even with over-scanning.
self.assertWalkYields([c11, c10, c8], [c11.id], since=7)
def assertTopoOrderEqual(self, expected_commits, commits):
entries = [TestWalkEntry(c, None) for c in commits]
actual_ids = [e.commit.id for e in list(_topo_reorder(entries))]
self.assertEqual([c.id for c in expected_commits], actual_ids)
def test_topo_reorder_linear(self):
commits = self.make_linear_commits(5)
commits.reverse()
for perm in permutations(commits):
self.assertTopoOrderEqual(commits, perm)
def test_topo_reorder_multiple_parents(self):
c1, c2, c3 = self.make_commits([[1], [2], [3, 1, 2]])
# Already sorted, so totally FIFO.
self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
self.assertTopoOrderEqual([c3, c1, c2], [c3, c1, c2])
# c3 causes one parent to be yielded.
self.assertTopoOrderEqual([c3, c2, c1], [c2, c3, c1])
self.assertTopoOrderEqual([c3, c1, c2], [c1, c3, c2])
# c3 causes both parents to be yielded.
self.assertTopoOrderEqual([c3, c2, c1], [c1, c2, c3])
self.assertTopoOrderEqual([c3, c2, c1], [c2, c1, c3])
def test_topo_reorder_multiple_children(self):
c1, c2, c3 = self.make_commits([[1], [2, 1], [3, 1]])
# c2 and c3 are FIFO but c1 moves to the end.
self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
self.assertTopoOrderEqual([c3, c2, c1], [c3, c1, c2])
self.assertTopoOrderEqual([c3, c2, c1], [c1, c3, c2])
self.assertTopoOrderEqual([c2, c3, c1], [c2, c3, c1])
self.assertTopoOrderEqual([c2, c3, c1], [c2, c1, c3])
self.assertTopoOrderEqual([c2, c3, c1], [c1, c2, c3])
def test_out_of_order_children(self):
c1, c2, c3, c4, c5 = self.make_commits(
[[1], [2, 1], [3, 2], [4, 1], [5, 3, 4]],
times=[2, 1, 3, 4, 5])
self.assertWalkYields([c5, c4, c3, c1, c2], [c5.id])
self.assertWalkYields([c5, c4, c3, c2, c1], [c5.id], order=ORDER_TOPO)
def test_out_of_order_with_exclude(self):
# Create the following graph:
# c1-------x2---m6
# \ /
# \-y3--y4-/--y5
# Due to skew, y5 is the oldest commit.
c1, x2, y3, y4, y5, m6 = self.make_commits(
[[1], [2, 1], [3, 1], [4, 3], [5, 4], [6, 2, 4]],
times=[2, 3, 4, 5, 1, 6])
self.assertWalkYields([m6, y4, y3, x2, c1], [m6.id])
# Ensure that c1..y4 get excluded even though they're popped from the
# priority queue long before y5.
self.assertWalkYields([m6, x2], [m6.id], exclude=[y5.id])
def test_empty_walk(self):
c1, c2, c3 = self.make_linear_commits(3)
self.assertWalkYields([], [c3.id], exclude=[c3.id])
+
+
+class WalkEntryTest(TestCase):
+
+ def setUp(self):
+ super(WalkEntryTest, self).setUp()
+ self.store = MemoryObjectStore()
+
+ def make_commits(self, commit_spec, **kwargs):
+ times = kwargs.pop('times', [])
+ attrs = kwargs.pop('attrs', {})
+ for i, t in enumerate(times):
+ attrs.setdefault(i + 1, {})['commit_time'] = t
+ return build_commit_graph(self.store, commit_spec, attrs=attrs,
+ **kwargs)
+
+ def make_linear_commits(self, num_commits, **kwargs):
+ commit_spec = []
+ for i in range(1, num_commits + 1):
+ c = [i]
+ if i > 1:
+ c.append(i - 1)
+ commit_spec.append(c)
+ return self.make_commits(commit_spec, **kwargs)
+
+ def test_all_changes(self):
+ # Construct a commit with 2 files in different subdirectories.
+ blob_a = make_object(Blob, data=b'a')
+ blob_b = make_object(Blob, data=b'b')
+ c1 = self.make_linear_commits(
+ 1,
+ trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+ )[0]
+
+ # Get the WalkEntry for the commit.
+ walker = Walker(self.store, c1.id)
+ walker_entry = list(walker)[0]
+ changes = walker_entry.changes()
+
+ # Compare the changes with the expected values.
+ entry_a = (b'x/a', F, blob_a.id)
+ entry_b = (b'y/b', F, blob_b.id)
+ self.assertEqual(
+ [TreeChange.add(entry_a),
+ TreeChange.add(entry_b)],
+ changes,
+ )
+
+ def test_all_with_merge(self):
+ blob_a = make_object(Blob, data=b'a')
+ blob_a2 = make_object(Blob, data=b'a2')
+ blob_b = make_object(Blob, data=b'b')
+ blob_b2 = make_object(Blob, data=b'b2')
+ x1, y2, m3 = self.make_commits(
+ [[1], [2], [3, 1, 2]],
+ trees={1: [(b'x/a', blob_a)],
+ 2: [(b'y/b', blob_b)],
+ 3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+ # Get the WalkEntry for the merge commit.
+ walker = Walker(self.store, m3.id)
+ entries = list(walker)
+ walker_entry = entries[0]
+ self.assertEqual(walker_entry.commit.id, m3.id)
+ changes = walker_entry.changes()
+ self.assertEqual(2, len(changes))
+
+ entry_a = (b'x/a', F, blob_a.id)
+ entry_a2 = (b'x/a', F, blob_a2.id)
+ entry_b = (b'y/b', F, blob_b.id)
+ entry_b2 = (b'y/b', F, blob_b2.id)
+ self.assertEqual(
+ [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2),
+ TreeChange.add(entry_a2)],
+ [TreeChange.add(entry_b2),
+ TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]],
+ changes,
+ )
+
+ def test_filter_changes(self):
+ # Construct a commit with 2 files in different subdirectories.
+ blob_a = make_object(Blob, data=b'a')
+ blob_b = make_object(Blob, data=b'b')
+ c1 = self.make_linear_commits(
+ 1,
+ trees={1: [(b'x/a', blob_a), (b'y/b', blob_b)]},
+ )[0]
+
+ # Get the WalkEntry for the commit.
+ walker = Walker(self.store, c1.id)
+ walker_entry = list(walker)[0]
+ changes = walker_entry.changes(path_prefix=b'x')
+
+ # Compare the changes with the expected values.
+ entry_a = (b'a', F, blob_a.id)
+ self.assertEqual(
+ [TreeChange.add(entry_a)],
+ changes,
+ )
+
+ def test_filter_with_merge(self):
+ blob_a = make_object(Blob, data=b'a')
+ blob_a2 = make_object(Blob, data=b'a2')
+ blob_b = make_object(Blob, data=b'b')
+ blob_b2 = make_object(Blob, data=b'b2')
+ x1, y2, m3 = self.make_commits(
+ [[1], [2], [3, 1, 2]],
+ trees={1: [(b'x/a', blob_a)],
+ 2: [(b'y/b', blob_b)],
+ 3: [(b'x/a', blob_a2), (b'y/b', blob_b2)]})
+
+ # Get the WalkEntry for the merge commit.
+ walker = Walker(self.store, m3.id)
+ entries = list(walker)
+ walker_entry = entries[0]
+ self.assertEqual(walker_entry.commit.id, m3.id)
+ changes = walker_entry.changes(b'x')
+ self.assertEqual(1, len(changes))
+
+ entry_a = (b'a', F, blob_a.id)
+ entry_a2 = (b'a', F, blob_a2.id)
+ self.assertEqual(
+ [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2)]],
+ changes,
+ )
diff --git a/dulwich/walk.py b/dulwich/walk.py
index 66cc7d1a..f5696543 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -1,374 +1,405 @@
# walk.py -- General implementation of walking commits and their contents.
# Copyright (C) 2010 Google, Inc.
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# for a copy of the GNU General Public License
# and for a copy of the Apache
# License, Version 2.0.
#
"""General implementation of walking commits and their contents."""
from collections import defaultdict
import collections
import heapq
from itertools import chain
from dulwich.diff_tree import (
RENAME_CHANGE_TYPES,
tree_changes,
tree_changes_for_merge,
RenameDetector,
)
from dulwich.errors import (
MissingCommitError,
)
ORDER_DATE = 'date'
ORDER_TOPO = 'topo'
ALL_ORDERS = (ORDER_DATE, ORDER_TOPO)
# Maximum number of commits to walk past a commit time boundary.
_MAX_EXTRA_COMMITS = 5
class WalkEntry(object):
"""Object encapsulating a single result from a walk."""
def __init__(self, walker, commit):
self.commit = commit
self._store = walker.store
self._get_parents = walker.get_parents
- self._changes = None
+ self._changes = {}
self._rename_detector = walker.rename_detector
- def changes(self):
+ def changes(self, path_prefix=None):
"""Get the tree changes for this entry.
+ :param path_prefix: Portion of the path in the repository to
+ use to filter changes. Must be a directory name. Must be
+ a full, valid, path reference (no partial names or wildcards).
:return: For commits with up to one parent, a list of TreeChange
objects; if the commit has no parents, these will be relative to the
empty tree. For merge commits, a list of lists of TreeChange
objects; see dulwich.diff.tree_changes_for_merge.
"""
- if self._changes is None:
+ cached = self._changes.get(path_prefix)
+ if cached is None:
commit = self.commit
if not self._get_parents(commit):
changes_func = tree_changes
parent = None
elif len(self._get_parents(commit)) == 1:
changes_func = tree_changes
parent = self._store[self._get_parents(commit)[0]].tree
+ if path_prefix:
+ mode, subtree_sha = parent.lookup_path(
+ self._store.__getitem__,
+ path_prefix,
+ )
+ parent = self._store[subtree_sha]
else:
changes_func = tree_changes_for_merge
parent = [self._store[p].tree for p in self._get_parents(commit)]
- self._changes = list(changes_func(
- self._store, parent, commit.tree,
+ if path_prefix:
+ parent_trees = [self._store[p] for p in parent]
+ parent = []
+ for p in parent_trees:
+ try:
+ mode, st = p.lookup_path(
+ self._store.__getitem__,
+ path_prefix,
+ )
+ except KeyError:
+ pass
+ else:
+ parent.append(st)
+ commit_tree_sha = commit.tree
+ if path_prefix:
+ commit_tree = self._store[commit_tree_sha]
+ mode, commit_tree_sha = commit_tree.lookup_path(
+ self._store.__getitem__,
+ path_prefix,
+ )
+ cached = list(changes_func(
+ self._store, parent, commit_tree_sha,
rename_detector=self._rename_detector))
- return self._changes
+ self._changes[path_prefix] = cached
+ return self._changes[path_prefix]
def __repr__(self):
return '' % (
self.commit.id, self.changes())
class _CommitTimeQueue(object):
"""Priority queue of WalkEntry objects by commit time."""
def __init__(self, walker):
self._walker = walker
self._store = walker.store
self._get_parents = walker.get_parents
self._excluded = walker.excluded
self._pq = []
self._pq_set = set()
self._seen = set()
self._done = set()
self._min_time = walker.since
self._last = None
self._extra_commits_left = _MAX_EXTRA_COMMITS
self._is_finished = False
for commit_id in chain(walker.include, walker.excluded):
self._push(commit_id)
def _push(self, commit_id):
try:
commit = self._store[commit_id]
except KeyError:
raise MissingCommitError(commit_id)
if commit_id not in self._pq_set and commit_id not in self._done:
heapq.heappush(self._pq, (-commit.commit_time, commit))
self._pq_set.add(commit_id)
self._seen.add(commit_id)
def _exclude_parents(self, commit):
excluded = self._excluded
seen = self._seen
todo = [commit]
while todo:
commit = todo.pop()
for parent in self._get_parents(commit):
if parent not in excluded and parent in seen:
# TODO: This is inefficient unless the object store does
# some caching (which DiskObjectStore currently does not).
# We could either add caching in this class or pass around
# parsed queue entry objects instead of commits.
todo.append(self._store[parent])
excluded.add(parent)
def next(self):
if self._is_finished:
return None
while self._pq:
_, commit = heapq.heappop(self._pq)
sha = commit.id
self._pq_set.remove(sha)
if sha in self._done:
continue
self._done.add(sha)
for parent_id in self._get_parents(commit):
self._push(parent_id)
reset_extra_commits = True
is_excluded = sha in self._excluded
if is_excluded:
self._exclude_parents(commit)
if self._pq and all(c.id in self._excluded
for _, c in self._pq):
_, n = self._pq[0]
if self._last and n.commit_time >= self._last.commit_time:
# If the next commit is newer than the last one, we need
# to keep walking in case its parents (which we may not
# have seen yet) are excluded. This gives the excluded
# set a chance to "catch up" while the commit is still
# in the Walker's output queue.
reset_extra_commits = True
else:
reset_extra_commits = False
if (self._min_time is not None and
commit.commit_time < self._min_time):
# We want to stop walking at min_time, but commits at the
# boundary may be out of order with respect to their parents. So
# we walk _MAX_EXTRA_COMMITS more commits once we hit this
# boundary.
reset_extra_commits = False
if reset_extra_commits:
# We're not at a boundary, so reset the counter.
self._extra_commits_left = _MAX_EXTRA_COMMITS
else:
self._extra_commits_left -= 1
if not self._extra_commits_left:
break
if not is_excluded:
self._last = commit
return WalkEntry(self._walker, commit)
self._is_finished = True
return None
__next__ = next
class Walker(object):
"""Object for performing a walk of commits in a store.
Walker objects are initialized with a store and other options and can then
be treated as iterators of Commit objects.
"""
def __init__(self, store, include, exclude=None, order=ORDER_DATE,
reverse=False, max_entries=None, paths=None,
rename_detector=None, follow=False, since=None, until=None,
get_parents=lambda commit: commit.parents,
queue_cls=_CommitTimeQueue):
"""Constructor.
:param store: ObjectStore instance for looking up objects.
:param include: Iterable of SHAs of commits to include along with their
ancestors.
:param exclude: Iterable of SHAs of commits to exclude along with their
ancestors, overriding includes.
:param order: ORDER_* constant specifying the order of results. Anything
other than ORDER_DATE may result in O(n) memory usage.
:param reverse: If True, reverse the order of output, requiring O(n)
memory.
:param max_entries: The maximum number of entries to yield, or None for
no limit.
:param paths: Iterable of file or subtree paths to show entries for.
:param rename_detector: diff.RenameDetector object for detecting
renames.
:param follow: If True, follow path across renames/copies. Forces a
default rename_detector.
:param since: Timestamp to list commits after.
:param until: Timestamp to list commits before.
:param get_parents: Method to retrieve the parents of a commit
:param queue_cls: A class to use for a queue of commits, supporting the
iterator protocol. The constructor takes a single argument, the
Walker.
"""
# Note: when adding arguments to this method, please also update
# dulwich.repo.BaseRepo.get_walker
if order not in ALL_ORDERS:
raise ValueError('Unknown walk order %s' % order)
self.store = store
if not isinstance(include, list):
include = [include]
self.include = include
self.excluded = set(exclude or [])
self.order = order
self.reverse = reverse
self.max_entries = max_entries
self.paths = paths and set(paths) or None
if follow and not rename_detector:
rename_detector = RenameDetector(store)
self.rename_detector = rename_detector
self.get_parents = get_parents
self.follow = follow
self.since = since
self.until = until
self._num_entries = 0
self._queue = queue_cls(self)
self._out_queue = collections.deque()
def _path_matches(self, changed_path):
if changed_path is None:
return False
for followed_path in self.paths:
if changed_path == followed_path:
return True
if (changed_path.startswith(followed_path) and
changed_path[len(followed_path)] == b'/'[0]):
return True
return False
def _change_matches(self, change):
if not change:
return False
old_path = change.old.path
new_path = change.new.path
if self._path_matches(new_path):
if self.follow and change.type in RENAME_CHANGE_TYPES:
self.paths.add(old_path)
self.paths.remove(new_path)
return True
elif self._path_matches(old_path):
return True
return False
def _should_return(self, entry):
"""Determine if a walk entry should be returned..
:param entry: The WalkEntry to consider.
:return: True if the WalkEntry should be returned by this walk, or False
otherwise (e.g. if it doesn't match any requested paths).
"""
commit = entry.commit
if self.since is not None and commit.commit_time < self.since:
return False
if self.until is not None and commit.commit_time > self.until:
return False
if commit.id in self.excluded:
return False
if self.paths is None:
return True
if len(self.get_parents(commit)) > 1:
for path_changes in entry.changes():
# For merge commits, only include changes with conflicts for
# this path. Since a rename conflict may include different
# old.paths, we have to check all of them.
for change in path_changes:
if self._change_matches(change):
return True
else:
for change in entry.changes():
if self._change_matches(change):
return True
return None
def _next(self):
max_entries = self.max_entries
while max_entries is None or self._num_entries < max_entries:
entry = next(self._queue)
if entry is not None:
self._out_queue.append(entry)
if entry is None or len(self._out_queue) > _MAX_EXTRA_COMMITS:
if not self._out_queue:
return None
entry = self._out_queue.popleft()
if self._should_return(entry):
self._num_entries += 1
return entry
return None
def _reorder(self, results):
"""Possibly reorder a results iterator.
:param results: An iterator of WalkEntry objects, in the order returned
from the queue_cls.
:return: An iterator or list of WalkEntry objects, in the order required
by the Walker.
"""
if self.order == ORDER_TOPO:
results = _topo_reorder(results, self.get_parents)
if self.reverse:
results = reversed(list(results))
return results
def __iter__(self):
return iter(self._reorder(iter(self._next, None)))
def _topo_reorder(entries, get_parents=lambda commit: commit.parents):
"""Reorder an iterable of entries topologically.
This works best assuming the entries are already in almost-topological
order, e.g. in commit time order.
:param entries: An iterable of WalkEntry objects.
:param get_parents: Optional function for getting the parents of a commit.
:return: iterator over WalkEntry objects from entries in FIFO order, except
where a parent would be yielded before any of its children.
"""
todo = collections.deque()
pending = {}
num_children = defaultdict(int)
for entry in entries:
todo.append(entry)
for p in get_parents(entry.commit):
num_children[p] += 1
while todo:
entry = todo.popleft()
commit = entry.commit
commit_id = commit.id
if num_children[commit_id]:
pending[commit_id] = entry
continue
for parent_id in get_parents(commit):
num_children[parent_id] -= 1
if not num_children[parent_id]:
parent_entry = pending.pop(parent_id, None)
if parent_entry:
todo.appendleft(parent_entry)
yield entry