diff --git a/dulwich/ignore.py b/dulwich/ignore.py index b3878938..ab0e92a0 100644 --- a/dulwich/ignore.py +++ b/dulwich/ignore.py @@ -1,358 +1,358 @@ # Copyright (C) 2017 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Parsing of gitignore files. For details for the matching rules, see https://git-scm.com/docs/gitignore """ import os.path import re import sys def _translate_segment(segment): if segment == b"*": return b'[^/]+' res = b"" i, n = 0, len(segment) while i < n: c = segment[i:i+1] i = i+1 if c == b'*': res += b'[^/]*' elif c == b'?': res += b'.' elif c == b'[': j = i if j < n and segment[j:j+1] == b'!': j = j+1 if j < n and segment[j:j+1] == b']': j = j+1 while j < n and segment[j:j+1] != b']': j = j+1 if j >= n: res += b'\\[' else: stuff = segment[i:j].replace(b'\\', b'\\\\') i = j+1 if stuff.startswith(b'!'): stuff = b'^' + stuff[1:] elif stuff.startswith(b'^'): stuff = b'\\' + stuff res += b'[' + stuff + b']' else: res += re.escape(c) return res def translate(pat): """Translate a shell PATTERN to a regular expression. There is no way to quote meta-characters. Originally copied from fnmatch in Python 2.7, but modified for Dulwich to cope with features in Git ignore patterns. """ res = b'(?ms)' if b'/' not in pat[:-1]: # If there's no slash, this is a filename-based match res += b'(.*/)?' if pat.startswith(b'**/'): # Leading **/ pat = pat[2:] res += b'(.*/)?' if pat.startswith(b'/'): pat = pat[1:] for i, segment in enumerate(pat.split(b'/')): if segment == b'**': res += b'(/.*)?' continue else: res += ((re.escape(b'/') if i > 0 else b'') + _translate_segment(segment)) if not pat.endswith(b'/'): res += b'/?' - return res + b'\Z' + return res + b'\\Z' def read_ignore_patterns(f): """Read a git ignore file. :param f: File-like object to read from :return: List of patterns """ for line in f: line = line.rstrip(b"\r\n") # Ignore blank lines, they're used for readability. if not line: continue if line.startswith(b'#'): # Comment continue # Trailing spaces are ignored unless they are quoted with a backslash. while line.endswith(b' ') and not line.endswith(b'\\ '): line = line[:-1] line = line.replace(b'\\ ', b' ') yield line def match_pattern(path, pattern, ignorecase=False): """Match a gitignore-style pattern against a path. :param path: Path to match :param pattern: Pattern to match :param ignorecase: Whether to do case-sensitive matching :return: bool indicating whether the pattern matched """ return Pattern(pattern, ignorecase).match(path) class Pattern(object): """A single ignore pattern.""" def __init__(self, pattern, ignorecase=False): self.pattern = pattern self.ignorecase = ignorecase if pattern[0:1] == b'!': self.is_exclude = False pattern = pattern[1:] else: if pattern[0:1] == b'\\': pattern = pattern[1:] self.is_exclude = True flags = 0 if self.ignorecase: flags = re.IGNORECASE self._re = re.compile(translate(pattern), flags) def __bytes__(self): return self.pattern def __str__(self): return self.pattern.decode(sys.getfilesystemencoding()) def __eq__(self, other): return (type(self) == type(other) and self.pattern == other.pattern and self.ignorecase == other.ignorecase) def __repr__(self): return "%s(%s, %r)" % ( type(self).__name__, self.pattern, self.ignorecase) def match(self, path): """Try to match a path against this ignore pattern. :param path: Path to match (relative to ignore location) :return: boolean """ return bool(self._re.match(path)) class IgnoreFilter(object): def __init__(self, patterns, ignorecase=False): self._patterns = [] self._ignorecase = ignorecase for pattern in patterns: self.append_pattern(pattern) def append_pattern(self, pattern): """Add a pattern to the set.""" self._patterns.append(Pattern(pattern, self._ignorecase)) def find_matching(self, path): """Yield all matching patterns for path. :param path: Path to match :return: Iterator over iterators """ if not isinstance(path, bytes): path = path.encode(sys.getfilesystemencoding()) for pattern in self._patterns: if pattern.match(path): yield pattern def is_ignored(self, path): """Check whether a path is ignored. For directories, include a trailing slash. :return: status is None if file is not mentioned, True if it is included, False if it is explicitly excluded. """ status = None for pattern in self.find_matching(path): status = pattern.is_exclude return status @classmethod def from_path(cls, path, ignorecase=False): with open(path, 'rb') as f: ret = cls(read_ignore_patterns(f), ignorecase) ret._path = path return ret def __repr__(self): if getattr(self, '_path', None) is None: return "<%s>" % (type(self).__name__) else: return "%s.from_path(%r)" % (type(self).__name__, self._path) class IgnoreFilterStack(object): """Check for ignore status in multiple filters.""" def __init__(self, filters): self._filters = filters def is_ignored(self, path): """Check whether a path is explicitly included or excluded in ignores. :param path: Path to check :return: None if the file is not mentioned, True if it is included, False if it is explicitly excluded. """ status = None for filter in self._filters: status = filter.is_ignored(path) if status is not None: return status return status def default_user_ignore_filter_path(config): """Return default user ignore filter path. :param config: A Config object :return: Path to a global ignore file """ try: return config.get((b'core', ), b'excludesFile') except KeyError: pass xdg_config_home = os.environ.get( "XDG_CONFIG_HOME", os.path.expanduser("~/.config/"), ) return os.path.join(xdg_config_home, 'git', 'ignore') class IgnoreFilterManager(object): """Ignore file manager.""" def __init__(self, top_path, global_filters, ignorecase): self._path_filters = {} self._top_path = top_path self._global_filters = global_filters self._ignorecase = ignorecase def __repr__(self): return "%s(%s, %r, %r)" % ( type(self).__name__, self._top_path, self._global_filters, self._ignorecase) def _load_path(self, path): try: return self._path_filters[path] except KeyError: pass p = os.path.join(self._top_path, path, '.gitignore') try: self._path_filters[path] = IgnoreFilter.from_path( p, self._ignorecase) except IOError: self._path_filters[path] = None return self._path_filters[path] def find_matching(self, path): """Find matching patterns for path. Stops after the first ignore file with matches. :param path: Path to check :return: Iterator over Pattern instances """ if os.path.isabs(path): raise ValueError('%s is an absolute path' % path) filters = [(0, f) for f in self._global_filters] if os.path.sep != '/': path = path.replace(os.path.sep, '/') parts = path.split('/') for i in range(len(parts)+1): dirname = '/'.join(parts[:i]) for s, f in filters: relpath = '/'.join(parts[s:i]) if i < len(parts): # Paths leading up to the final part are all directories, # so need a trailing slash. relpath += '/' matches = list(f.find_matching(relpath)) if matches: return iter(matches) ignore_filter = self._load_path(dirname) if ignore_filter is not None: filters.insert(0, (i, ignore_filter)) return iter([]) def is_ignored(self, path): """Check whether a path is explicitly included or excluded in ignores. :param path: Path to check :return: None if the file is not mentioned, True if it is included, False if it is explicitly excluded. """ matches = list(self.find_matching(path)) if matches: return matches[-1].is_exclude return None @classmethod def from_repo(cls, repo): """Create a IgnoreFilterManager from a repository. :param repo: Repository object :return: A `IgnoreFilterManager` object """ global_filters = [] for p in [ os.path.join(repo.controldir(), 'info', 'exclude'), default_user_ignore_filter_path(repo.get_config_stack())]: try: global_filters.append(IgnoreFilter.from_path(p)) except IOError: pass config = repo.get_config_stack() ignorecase = config.get_boolean((b'core'), (b'ignorecase'), False) return cls(repo.path, global_filters, ignorecase) diff --git a/dulwich/tests/compat/test_pack.py b/dulwich/tests/compat/test_pack.py index ea75f8cf..46daa68a 100644 --- a/dulwich/tests/compat/test_pack.py +++ b/dulwich/tests/compat/test_pack.py @@ -1,156 +1,156 @@ # test_pack.py -- Compatibility tests for git packs. # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Compatibility tests for git packs.""" import binascii import os import re import shutil import tempfile from dulwich.pack import ( write_pack, ) from dulwich.objects import ( Blob, ) from dulwich.tests import ( SkipTest, ) from dulwich.tests.test_pack import ( a_sha, pack1_sha, PackTests, ) from dulwich.tests.compat.utils import ( require_git_version, run_git_or_fail, ) -_NON_DELTA_RE = re.compile(b'non delta: (?P\d+) objects') +_NON_DELTA_RE = re.compile(b'non delta: (?P\\d+) objects') def _git_verify_pack_object_list(output): pack_shas = set() for line in output.splitlines(): sha = line[:40] try: binascii.unhexlify(sha) except (TypeError, binascii.Error): continue # non-sha line pack_shas.add(sha) return pack_shas class TestPack(PackTests): """Compatibility tests for reading and writing pack files.""" def setUp(self): require_git_version((1, 5, 0)) super(TestPack, self).setUp() self._tempdir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self._tempdir) def test_copy(self): with self.get_pack(pack1_sha) as origpack: self.assertSucceeds(origpack.index.check) pack_path = os.path.join(self._tempdir, "Elch") write_pack(pack_path, origpack.pack_tuples()) output = run_git_or_fail(['verify-pack', '-v', pack_path]) orig_shas = set(o.id for o in origpack.iterobjects()) self.assertEqual(orig_shas, _git_verify_pack_object_list(output)) def test_deltas_work(self): with self.get_pack(pack1_sha) as orig_pack: orig_blob = orig_pack[a_sha] new_blob = Blob() new_blob.data = orig_blob.data + b'x' all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)] pack_path = os.path.join(self._tempdir, 'pack_with_deltas') write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(['verify-pack', '-v', pack_path]) self.assertEqual(set(x[0].id for x in all_to_pack), _git_verify_pack_object_list(output)) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 3 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta')) self.assertEqual( 3, got_non_delta, 'Expected 3 non-delta objects, got %d' % got_non_delta) def test_delta_medium_object(self): # This tests an object set that will have a copy operation # 2**20 in size. with self.get_pack(pack1_sha) as orig_pack: orig_blob = orig_pack[a_sha] new_blob = Blob() new_blob.data = orig_blob.data + (b'x' * 2 ** 20) new_blob_2 = Blob() new_blob_2.data = new_blob.data + b'y' all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None), (new_blob_2, None)] pack_path = os.path.join(self._tempdir, 'pack_with_deltas') write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(['verify-pack', '-v', pack_path]) self.assertEqual(set(x[0].id for x in all_to_pack), _git_verify_pack_object_list(output)) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 3 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta')) self.assertEqual( 3, got_non_delta, 'Expected 3 non-delta objects, got %d' % got_non_delta) # We expect one object to have a delta chain length of two # (new_blob_2), so let's verify that actually happens: self.assertIn(b'chain length = 2', output) # This test is SUPER slow: over 80 seconds on a 2012-era # laptop. This is because SequenceMatcher is worst-case quadratic # on the input size. It's impractical to produce deltas for # objects this large, but it's still worth doing the right thing # when it happens. def test_delta_large_object(self): # This tests an object set that will have a copy operation # 2**25 in size. This is a copy large enough that it requires # two copy operations in git's binary delta format. raise SkipTest('skipping slow, large test') with self.get_pack(pack1_sha) as orig_pack: new_blob = Blob() new_blob.data = 'big blob' + ('x' * 2 ** 25) new_blob_2 = Blob() new_blob_2.data = new_blob.data + 'y' all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None), (new_blob_2, None)] pack_path = os.path.join(self._tempdir, "pack_with_deltas") write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(['verify-pack', '-v', pack_path]) self.assertEqual(set(x[0].id for x in all_to_pack), _git_verify_pack_object_list(output)) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 4 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta')) self.assertEqual( 4, got_non_delta, 'Expected 4 non-delta objects, got %d' % got_non_delta) diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py index cc5fbbeb..162b0738 100644 --- a/dulwich/tests/test_diff_tree.py +++ b/dulwich/tests/test_diff_tree.py @@ -1,948 +1,948 @@ # test_diff_tree.py -- Tests for file and tree diff utilities. # Copyright (C) 2010 Google, Inc. # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for file and tree diff utilities.""" from itertools import permutations from dulwich.diff_tree import ( CHANGE_MODIFY, CHANGE_RENAME, CHANGE_COPY, CHANGE_UNCHANGED, TreeChange, _merge_entries, _merge_entries_py, tree_changes, tree_changes_for_merge, _count_blocks, _count_blocks_py, _similarity_score, _tree_change_key, RenameDetector, _is_tree, _is_tree_py ) from dulwich.index import ( commit_tree, ) from dulwich.object_store import ( MemoryObjectStore, ) from dulwich.objects import ( ShaFile, Blob, TreeEntry, Tree, ) from dulwich.tests import ( TestCase, ) from dulwich.tests.utils import ( F, make_object, functest_builder, ext_functest_builder, ) class DiffTestCase(TestCase): def setUp(self): super(DiffTestCase, self).setUp() self.store = MemoryObjectStore() self.empty_tree = self.commit_tree([]) def commit_tree(self, entries): commit_blobs = [] for entry in entries: if len(entry) == 2: path, obj = entry mode = F else: path, obj, mode = entry if isinstance(obj, Blob): self.store.add_object(obj) sha = obj.id else: sha = obj commit_blobs.append((path, sha, mode)) return self.store[commit_tree(self.store, commit_blobs)] class TreeChangesTest(DiffTestCase): def setUp(self): super(TreeChangesTest, self).setUp() self.detector = RenameDetector(self.store) def assertMergeFails(self, merge_entries, name, mode, sha): t = Tree() t[name] = (mode, sha) self.assertRaises((TypeError, ValueError), merge_entries, '', t, t) def _do_test_merge_entries(self, merge_entries): blob_a1 = make_object(Blob, data=b'a1') blob_a2 = make_object(Blob, data=b'a2') blob_b1 = make_object(Blob, data=b'b1') blob_c2 = make_object(Blob, data=b'c2') tree1 = self.commit_tree([(b'a', blob_a1, 0o100644), (b'b', blob_b1, 0o100755)]) tree2 = self.commit_tree([(b'a', blob_a2, 0o100644), (b'c', blob_c2, 0o100755)]) self.assertEqual([], merge_entries(b'', self.empty_tree, self.empty_tree)) self.assertEqual( [((None, None, None), (b'a', 0o100644, blob_a1.id)), ((None, None, None), (b'b', 0o100755, blob_b1.id)), ], merge_entries(b'', self.empty_tree, tree1)) self.assertEqual( [((None, None, None), (b'x/a', 0o100644, blob_a1.id)), ((None, None, None), (b'x/b', 0o100755, blob_b1.id)), ], merge_entries(b'x', self.empty_tree, tree1)) self.assertEqual( [((b'a', 0o100644, blob_a2.id), (None, None, None)), ((b'c', 0o100755, blob_c2.id), (None, None, None)), ], merge_entries(b'', tree2, self.empty_tree)) self.assertEqual( [((b'a', 0o100644, blob_a1.id), (b'a', 0o100644, blob_a2.id)), ((b'b', 0o100755, blob_b1.id), (None, None, None)), ((None, None, None), (b'c', 0o100755, blob_c2.id)), ], merge_entries(b'', tree1, tree2)) self.assertEqual( [((b'a', 0o100644, blob_a2.id), (b'a', 0o100644, blob_a1.id)), ((None, None, None), (b'b', 0o100755, blob_b1.id)), ((b'c', 0o100755, blob_c2.id), (None, None, None)), ], merge_entries(b'', tree2, tree1)) self.assertMergeFails(merge_entries, 0xdeadbeef, 0o100644, '1' * 40) self.assertMergeFails(merge_entries, b'a', b'deadbeef', '1' * 40) self.assertMergeFails(merge_entries, b'a', 0o100644, 0xdeadbeef) test_merge_entries = functest_builder(_do_test_merge_entries, _merge_entries_py) test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries, _merge_entries) def _do_test_is_tree(self, is_tree): self.assertFalse(is_tree(TreeEntry(None, None, None))) self.assertFalse(is_tree(TreeEntry(b'a', 0o100644, b'a' * 40))) self.assertFalse(is_tree(TreeEntry(b'a', 0o100755, b'a' * 40))) self.assertFalse(is_tree(TreeEntry(b'a', 0o120000, b'a' * 40))) self.assertTrue(is_tree(TreeEntry(b'a', 0o040000, b'a' * 40))) self.assertRaises(TypeError, is_tree, TreeEntry(b'a', b'x', b'a' * 40)) self.assertRaises(AttributeError, is_tree, 1234) test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py) test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree) def assertChangesEqual(self, expected, tree1, tree2, **kwargs): actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs)) self.assertEqual(expected, actual) # For brevity, the following tests use tuples instead of TreeEntry objects. def test_tree_changes_empty(self): self.assertChangesEqual([], self.empty_tree, self.empty_tree) def test_tree_changes_no_changes(self): blob = make_object(Blob, data=b'blob') tree = self.commit_tree([(b'a', blob), (b'b/c', blob)]) self.assertChangesEqual([], self.empty_tree, self.empty_tree) self.assertChangesEqual([], tree, tree) self.assertChangesEqual( [TreeChange(CHANGE_UNCHANGED, (b'a', F, blob.id), (b'a', F, blob.id)), TreeChange(CHANGE_UNCHANGED, (b'b/c', F, blob.id), (b'b/c', F, blob.id))], tree, tree, want_unchanged=True) def test_tree_changes_add_delete(self): blob_a = make_object(Blob, data=b'a') blob_b = make_object(Blob, data=b'b') tree = self.commit_tree([(b'a', blob_a, 0o100644), (b'x/b', blob_b, 0o100755)]) self.assertChangesEqual( [TreeChange.add((b'a', 0o100644, blob_a.id)), TreeChange.add((b'x/b', 0o100755, blob_b.id))], self.empty_tree, tree) self.assertChangesEqual( [TreeChange.delete((b'a', 0o100644, blob_a.id)), TreeChange.delete((b'x/b', 0o100755, blob_b.id))], tree, self.empty_tree) def test_tree_changes_modify_contents(self): blob_a1 = make_object(Blob, data=b'a1') blob_a2 = make_object(Blob, data=b'a2') tree1 = self.commit_tree([(b'a', blob_a1)]) tree2 = self.commit_tree([(b'a', blob_a2)]) self.assertChangesEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a2.id))], tree1, tree2) def test_tree_changes_modify_mode(self): blob_a = make_object(Blob, data=b'a') tree1 = self.commit_tree([(b'a', blob_a, 0o100644)]) tree2 = self.commit_tree([(b'a', blob_a, 0o100755)]) self.assertChangesEqual( [TreeChange(CHANGE_MODIFY, (b'a', 0o100644, blob_a.id), (b'a', 0o100755, blob_a.id))], tree1, tree2) def test_tree_changes_change_type(self): blob_a1 = make_object(Blob, data=b'a') blob_a2 = make_object(Blob, data=b'/foo/bar') tree1 = self.commit_tree([(b'a', blob_a1, 0o100644)]) tree2 = self.commit_tree([(b'a', blob_a2, 0o120000)]) self.assertChangesEqual( [TreeChange.delete((b'a', 0o100644, blob_a1.id)), TreeChange.add((b'a', 0o120000, blob_a2.id))], tree1, tree2) def test_tree_changes_change_type_same(self): blob_a1 = make_object(Blob, data=b'a') blob_a2 = make_object(Blob, data=b'/foo/bar') tree1 = self.commit_tree([(b'a', blob_a1, 0o100644)]) tree2 = self.commit_tree([(b'a', blob_a2, 0o120000)]) self.assertChangesEqual( [TreeChange(CHANGE_MODIFY, (b'a', 0o100644, blob_a1.id), (b'a', 0o120000, blob_a2.id))], tree1, tree2, change_type_same=True) def test_tree_changes_to_tree(self): blob_a = make_object(Blob, data=b'a') blob_x = make_object(Blob, data=b'x') tree1 = self.commit_tree([(b'a', blob_a)]) tree2 = self.commit_tree([(b'a/x', blob_x)]) self.assertChangesEqual( [TreeChange.delete((b'a', F, blob_a.id)), TreeChange.add((b'a/x', F, blob_x.id))], tree1, tree2) def test_tree_changes_complex(self): blob_a_1 = make_object(Blob, data=b'a1_1') blob_bx1_1 = make_object(Blob, data=b'bx1_1') blob_bx2_1 = make_object(Blob, data=b'bx2_1') blob_by1_1 = make_object(Blob, data=b'by1_1') blob_by2_1 = make_object(Blob, data=b'by2_1') tree1 = self.commit_tree([ (b'a', blob_a_1), (b'b/x/1', blob_bx1_1), (b'b/x/2', blob_bx2_1), (b'b/y/1', blob_by1_1), (b'b/y/2', blob_by2_1), ]) blob_a_2 = make_object(Blob, data=b'a1_2') blob_bx1_2 = blob_bx1_1 blob_by_2 = make_object(Blob, data=b'by_2') blob_c_2 = make_object(Blob, data=b'c_2') tree2 = self.commit_tree([ (b'a', blob_a_2), (b'b/x/1', blob_bx1_2), (b'b/y', blob_by_2), (b'c', blob_c_2), ]) self.assertChangesEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a_1.id), (b'a', F, blob_a_2.id)), TreeChange.delete((b'b/x/2', F, blob_bx2_1.id)), TreeChange.add((b'b/y', F, blob_by_2.id)), TreeChange.delete((b'b/y/1', F, blob_by1_1.id)), TreeChange.delete((b'b/y/2', F, blob_by2_1.id)), TreeChange.add((b'c', F, blob_c_2.id))], tree1, tree2) def test_tree_changes_name_order(self): blob = make_object(Blob, data=b'a') tree1 = self.commit_tree([(b'a', blob), (b'a.', blob), (b'a..', blob)]) # Tree order is the reverse of this, so if we used tree order, 'a..' # would not be merged. tree2 = self.commit_tree( [(b'a/x', blob), (b'a./x', blob), (b'a..', blob)]) self.assertChangesEqual( [TreeChange.delete((b'a', F, blob.id)), TreeChange.add((b'a/x', F, blob.id)), TreeChange.delete((b'a.', F, blob.id)), TreeChange.add((b'a./x', F, blob.id))], tree1, tree2) def test_tree_changes_prune(self): blob_a1 = make_object(Blob, data=b'a1') blob_a2 = make_object(Blob, data=b'a2') blob_x = make_object(Blob, data=b'x') tree1 = self.commit_tree([(b'a', blob_a1), (b'b/x', blob_x)]) tree2 = self.commit_tree([(b'a', blob_a2), (b'b/x', blob_x)]) # Remove identical items so lookups will fail unless we prune. subtree = self.store[tree1[b'b'][1]] for entry in subtree.items(): del self.store[entry.sha] del self.store[subtree.id] self.assertChangesEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a2.id))], tree1, tree2) def test_tree_changes_rename_detector(self): blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob_a2 = make_object(Blob, data=b'a\nb\nc\ne\n') blob_b = make_object(Blob, data=b'b') tree1 = self.commit_tree([(b'a', blob_a1), (b'b', blob_b)]) tree2 = self.commit_tree([(b'c', blob_a2), (b'b', blob_b)]) detector = RenameDetector(self.store) self.assertChangesEqual( [TreeChange.delete((b'a', F, blob_a1.id)), TreeChange.add((b'c', F, blob_a2.id))], tree1, tree2) self.assertChangesEqual( [TreeChange.delete((b'a', F, blob_a1.id)), TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id), (b'b', F, blob_b.id)), TreeChange.add((b'c', F, blob_a2.id))], tree1, tree2, want_unchanged=True) self.assertChangesEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id), (b'c', F, blob_a2.id))], tree1, tree2, rename_detector=detector) self.assertChangesEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id), (b'c', F, blob_a2.id)), TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id), (b'b', F, blob_b.id))], tree1, tree2, rename_detector=detector, want_unchanged=True) def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree, **kwargs): parent_tree_ids = [t.id for t in parent_trees] actual = list(tree_changes_for_merge( self.store, parent_tree_ids, merge_tree.id, **kwargs)) self.assertEqual(expected, actual) parent_tree_ids.reverse() expected = [list(reversed(cs)) for cs in expected] actual = list(tree_changes_for_merge( self.store, parent_tree_ids, merge_tree.id, **kwargs)) self.assertEqual(expected, actual) def test_tree_changes_for_merge_add_no_conflict(self): blob = make_object(Blob, data=b'blob') parent1 = self.commit_tree([]) parent2 = merge = self.commit_tree([(b'a', blob)]) self.assertChangesForMergeEqual([], [parent1, parent2], merge) self.assertChangesForMergeEqual([], [parent2, parent2], merge) def test_tree_changes_for_merge_add_modify_conflict(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') parent1 = self.commit_tree([]) parent2 = self.commit_tree([(b'a', blob1)]) merge = self.commit_tree([(b'a', blob2)]) self.assertChangesForMergeEqual( [[TreeChange.add((b'a', F, blob2.id)), TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id))]], [parent1, parent2], merge) def test_tree_changes_for_merge_modify_modify_conflict(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') blob3 = make_object(Blob, data=b'3') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = self.commit_tree([(b'a', blob2)]) merge = self.commit_tree([(b'a', blob3)]) self.assertChangesForMergeEqual( [[TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob3.id)), TreeChange(CHANGE_MODIFY, (b'a', F, blob2.id), (b'a', F, blob3.id))]], [parent1, parent2], merge) def test_tree_changes_for_merge_modify_no_conflict(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = merge = self.commit_tree([(b'a', blob2)]) self.assertChangesForMergeEqual([], [parent1, parent2], merge) def test_tree_changes_for_merge_delete_delete_conflict(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = self.commit_tree([(b'a', blob2)]) merge = self.commit_tree([]) self.assertChangesForMergeEqual( [[TreeChange.delete((b'a', F, blob1.id)), TreeChange.delete((b'a', F, blob2.id))]], [parent1, parent2], merge) def test_tree_changes_for_merge_delete_no_conflict(self): blob = make_object(Blob, data=b'blob') has = self.commit_tree([(b'a', blob)]) doesnt_have = self.commit_tree([]) self.assertChangesForMergeEqual([], [has, has], doesnt_have) self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have) def test_tree_changes_for_merge_octopus_no_conflict(self): r = list(range(5)) blobs = [make_object(Blob, data=bytes(i)) for i in r] parents = [self.commit_tree([(b'a', blobs[i])]) for i in r] for i in r: # Take the SHA from each of the parents. self.assertChangesForMergeEqual([], parents, parents[i]) def test_tree_changes_for_merge_octopus_modify_conflict(self): # Because the octopus merge strategy is limited, I doubt it's possible # to create this with the git command line. But the output is well- # defined, so test it anyway. r = list(range(5)) parent_blobs = [make_object(Blob, data=bytes(i)) for i in r] merge_blob = make_object(Blob, data=b'merge') parents = [self.commit_tree([(b'a', parent_blobs[i])]) for i in r] merge = self.commit_tree([(b'a', merge_blob)]) expected = [[TreeChange(CHANGE_MODIFY, (b'a', F, parent_blobs[i].id), (b'a', F, merge_blob.id)) for i in r]] self.assertChangesForMergeEqual(expected, parents, merge) def test_tree_changes_for_merge_octopus_delete(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'3') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = self.commit_tree([(b'a', blob2)]) parent3 = merge = self.commit_tree([]) self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge) self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge) self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge) self.assertChangesForMergeEqual( [[TreeChange.delete((b'a', F, blob1.id)), TreeChange.delete((b'a', F, blob2.id)), None]], [parent1, parent2, parent3], merge) def test_tree_changes_for_merge_add_add_same_conflict(self): blob = make_object(Blob, data=b'a\nb\nc\nd\n') parent1 = self.commit_tree([(b'a', blob)]) parent2 = self.commit_tree([]) merge = self.commit_tree([(b'b', blob)]) add = TreeChange.add((b'b', F, blob.id)) self.assertChangesForMergeEqual( [[add, add]], [parent1, parent2], merge) def test_tree_changes_for_merge_add_exact_rename_conflict(self): blob = make_object(Blob, data=b'a\nb\nc\nd\n') parent1 = self.commit_tree([(b'a', blob)]) parent2 = self.commit_tree([]) merge = self.commit_tree([(b'b', blob)]) self.assertChangesForMergeEqual( [[TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'b', F, blob.id)), TreeChange.add((b'b', F, blob.id))]], [parent1, parent2], merge, rename_detector=self.detector) def test_tree_changes_for_merge_add_content_rename_conflict(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = self.commit_tree([]) merge = self.commit_tree([(b'b', blob2)]) self.assertChangesForMergeEqual( [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id)), TreeChange.add((b'b', F, blob2.id))]], [parent1, parent2], merge, rename_detector=self.detector) def test_tree_changes_for_merge_modify_rename_conflict(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') parent1 = self.commit_tree([(b'a', blob1)]) parent2 = self.commit_tree([(b'b', blob1)]) merge = self.commit_tree([(b'b', blob2)]) self.assertChangesForMergeEqual( [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id)), TreeChange(CHANGE_MODIFY, (b'b', F, blob1.id), (b'b', F, blob2.id))]], [parent1, parent2], merge, rename_detector=self.detector) class RenameDetectionTest(DiffTestCase): def _do_test_count_blocks(self, count_blocks): blob = make_object(Blob, data=b'a\nb\na\n') self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2}, count_blocks(blob)) test_count_blocks = functest_builder(_do_test_count_blocks, _count_blocks_py) test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks, _count_blocks) def _do_test_count_blocks_no_newline(self, count_blocks): blob = make_object(Blob, data=b'a\na') self.assertEqual({hash(b'a\n'): 2, hash(b'a'): 1}, _count_blocks(blob)) test_count_blocks_no_newline = functest_builder( _do_test_count_blocks_no_newline, _count_blocks_py) test_count_blocks_no_newline_extension = ext_functest_builder( _do_test_count_blocks_no_newline, _count_blocks) def _do_test_count_blocks_chunks(self, count_blocks): blob = ShaFile.from_raw_chunks(Blob.type_num, [b'a\nb', b'\na\n']) self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2}, _count_blocks(blob)) test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks, _count_blocks_py) test_count_blocks_chunks_extension = ext_functest_builder( _do_test_count_blocks_chunks, _count_blocks) def _do_test_count_blocks_long_lines(self, count_blocks): a = b'a' * 64 data = a + b'xxx\ny\n' + a + b'zzz\n' blob = make_object(Blob, data=data) self.assertEqual({hash(b'a' * 64): 128, hash(b'xxx\n'): 4, hash(b'y\n'): 2, hash(b'zzz\n'): 4}, _count_blocks(blob)) test_count_blocks_long_lines = functest_builder( _do_test_count_blocks_long_lines, _count_blocks_py) test_count_blocks_long_lines_extension = ext_functest_builder( _do_test_count_blocks_long_lines, _count_blocks) def assertSimilar(self, expected_score, blob1, blob2): self.assertEqual(expected_score, _similarity_score(blob1, blob2)) self.assertEqual(expected_score, _similarity_score(blob2, blob1)) def test_similarity_score(self): blob0 = make_object(Blob, data=b'') blob1 = make_object(Blob, data=b'ab\ncd\ncd\n') blob2 = make_object(Blob, data=b'ab\n') blob3 = make_object(Blob, data=b'cd\n') blob4 = make_object(Blob, data=b'cd\ncd\n') self.assertSimilar(100, blob0, blob0) self.assertSimilar(0, blob0, blob1) self.assertSimilar(33, blob1, blob2) self.assertSimilar(33, blob1, blob3) self.assertSimilar(66, blob1, blob4) self.assertSimilar(0, blob2, blob3) self.assertSimilar(50, blob3, blob4) def test_similarity_score_cache(self): blob1 = make_object(Blob, data=b'ab\ncd\n') blob2 = make_object(Blob, data=b'ab\n') block_cache = {} self.assertEqual( 50, _similarity_score(blob1, blob2, block_cache=block_cache)) self.assertEqual(set([blob1.id, blob2.id]), set(block_cache)) def fail_chunks(): self.fail('Unexpected call to as_raw_chunks()') blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks blob1.raw_length = lambda: 6 blob2.raw_length = lambda: 3 self.assertEqual( 50, _similarity_score(blob1, blob2, block_cache=block_cache)) def test_tree_entry_sort(self): sha = 'abcd' * 10 expected_entries = [ TreeChange.add(TreeEntry(b'aaa', F, sha)), TreeChange(CHANGE_COPY, TreeEntry(b'bbb', F, sha), TreeEntry(b'aab', F, sha)), TreeChange(CHANGE_MODIFY, TreeEntry(b'bbb', F, sha), TreeEntry(b'bbb', F, b'dabc' * 10)), TreeChange(CHANGE_RENAME, TreeEntry(b'bbc', F, sha), TreeEntry(b'ddd', F, sha)), TreeChange.delete(TreeEntry(b'ccc', F, sha)), ] for perm in permutations(expected_entries): self.assertEqual(expected_entries, sorted(perm, key=_tree_change_key)) def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs): detector = RenameDetector(self.store, **kwargs) return detector.changes_with_renames(tree1.id, tree2.id, want_unchanged=want_unchanged) def test_no_renames(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\ne\nf\n') blob3 = make_object(Blob, data=b'a\nb\ng\nh\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'a', blob1), (b'b', blob3)]) self.assertEqual( [TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id), (b'b', F, blob3.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_one_to_one(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'c', blob1), (b'd', blob2)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob1.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'd', F, blob2.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_split_different_type(self): blob = make_object(Blob, data=b'/foo') tree1 = self.commit_tree([(b'a', blob, 0o100644)]) tree2 = self.commit_tree([(b'a', blob, 0o120000)]) self.assertEqual( [TreeChange.add((b'a', 0o120000, blob.id)), TreeChange.delete((b'a', 0o100644, blob.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_and_different_type(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'a', blob2, 0o120000), (b'b', blob1)]) self.assertEqual( [TreeChange.add((b'a', 0o120000, blob2.id)), TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob1.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_one_to_many(self): blob = make_object(Blob, data=b'1') tree1 = self.commit_tree([(b'a', blob)]) tree2 = self.commit_tree([(b'b', blob), (b'c', blob)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'b', F, blob.id)), TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'c', F, blob.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_many_to_one(self): blob = make_object(Blob, data=b'1') tree1 = self.commit_tree([(b'a', blob), (b'b', blob)]) tree2 = self.commit_tree([(b'c', blob)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'c', F, blob.id)), TreeChange.delete((b'b', F, blob.id))], self.detect_renames(tree1, tree2)) def test_exact_rename_many_to_many(self): blob = make_object(Blob, data=b'1') tree1 = self.commit_tree([(b'a', blob), (b'b', blob)]) tree2 = self.commit_tree([(b'c', blob), (b'd', blob), (b'e', blob)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'c', F, blob.id)), TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'e', F, blob.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob.id), (b'd', F, blob.id))], self.detect_renames(tree1, tree2)) def test_exact_copy_modify(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)]) self.assertEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id)), TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob1.id))], self.detect_renames(tree1, tree2)) def test_exact_copy_change_mode(self): blob = make_object(Blob, data=b'a\nb\nc\nd\n') tree1 = self.commit_tree([(b'a', blob)]) tree2 = self.commit_tree([(b'a', blob, 0o100755), (b'b', blob)]) self.assertEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob.id), (b'a', 0o100755, blob.id)), TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'b', F, blob.id))], self.detect_renames(tree1, tree2)) def test_rename_threshold(self): blob1 = make_object(Blob, data=b'a\nb\nc\n') blob2 = make_object(Blob, data=b'a\nb\nd\n') tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'b', blob2)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id))], self.detect_renames(tree1, tree2, rename_threshold=50)) self.assertEqual( [TreeChange.delete((b'a', F, blob1.id)), TreeChange.add((b'b', F, blob2.id))], self.detect_renames(tree1, tree2, rename_threshold=75)) def test_content_rename_max_files(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd') blob4 = make_object(Blob, data=b'a\nb\nc\ne\n') blob2 = make_object(Blob, data=b'e\nf\ng\nh\n') blob3 = make_object(Blob, data=b'e\nf\ng\ni\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'c', blob3), (b'd', blob4)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'd', F, blob4.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'c', F, blob3.id))], self.detect_renames(tree1, tree2)) self.assertEqual( [TreeChange.delete((b'a', F, blob1.id)), TreeChange.delete((b'b', F, blob2.id)), TreeChange.add((b'c', F, blob3.id)), TreeChange.add((b'd', F, blob4.id))], self.detect_renames(tree1, tree2, max_files=1)) def test_content_rename_one_to_one(self): b11 = make_object(Blob, data=b'a\nb\nc\nd\n') b12 = make_object(Blob, data=b'a\nb\nc\ne\n') - b21 = make_object(Blob, data=b'e\nf\ng\n\h') - b22 = make_object(Blob, data=b'e\nf\ng\n\i') + b21 = make_object(Blob, data=b'e\nf\ng\n\nh') + b22 = make_object(Blob, data=b'e\nf\ng\n\ni') tree1 = self.commit_tree([(b'a', b11), (b'b', b21)]) tree2 = self.commit_tree([(b'c', b12), (b'd', b22)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, b11.id), (b'c', F, b12.id)), TreeChange(CHANGE_RENAME, (b'b', F, b21.id), (b'd', F, b22.id))], self.detect_renames(tree1, tree2)) def test_content_rename_one_to_one_ordering(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\ne\nf\n') blob2 = make_object(Blob, data=b'a\nb\nc\nd\ng\nh\n') # 6/10 match to blob1, 8/10 match to blob2 blob3 = make_object(Blob, data=b'a\nb\nc\nd\ng\ni\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'c', blob3)]) self.assertEqual( [TreeChange.delete((b'a', F, blob1.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'c', F, blob3.id))], self.detect_renames(tree1, tree2)) tree3 = self.commit_tree([(b'a', blob2), (b'b', blob1)]) tree4 = self.commit_tree([(b'c', blob3)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob2.id), (b'c', F, blob3.id)), TreeChange.delete((b'b', F, blob1.id))], self.detect_renames(tree3, tree4)) def test_content_rename_one_to_many(self): blob1 = make_object(Blob, data=b'aa\nb\nc\nd\ne\n') blob2 = make_object(Blob, data=b'ab\nb\nc\nd\ne\n') # 8/11 match blob3 = make_object(Blob, data=b'aa\nb\nc\nd\nf\n') # 9/11 match tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'b', blob2), (b'c', blob3)]) self.assertEqual( [TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id)), TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id))], self.detect_renames(tree1, tree2)) def test_content_rename_many_to_one(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') blob3 = make_object(Blob, data=b'a\nb\nc\nf\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'c', blob3)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id)), TreeChange.delete((b'b', F, blob2.id))], self.detect_renames(tree1, tree2)) def test_content_rename_many_to_many(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') blob3 = make_object(Blob, data=b'a\nb\nc\nf\n') blob4 = make_object(Blob, data=b'a\nb\nc\ng\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'c', blob3), (b'd', blob4)]) # TODO(dborowitz): Distribute renames rather than greedily choosing # copies. self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id)), TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'd', F, blob4.id)), TreeChange.delete((b'b', F, blob2.id))], self.detect_renames(tree1, tree2)) def test_content_rename_with_more_deletions(self): blob1 = make_object(Blob, data=b'') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob1), (b'c', blob1), (b'd', blob1)]) tree2 = self.commit_tree([(b'e', blob1), (b'f', blob1), (b'g', blob1)]) self.maxDiff = None self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'e', F, blob1.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob1.id), (b'f', F, blob1.id)), TreeChange(CHANGE_RENAME, (b'c', F, blob1.id), (b'g', F, blob1.id)), TreeChange.delete((b'd', F, blob1.id))], self.detect_renames(tree1, tree2)) def test_content_rename_gitlink(self): blob1 = make_object(Blob, data=b'blob1') blob2 = make_object(Blob, data=b'blob2') link1 = b'1' * 40 link2 = b'2' * 40 tree1 = self.commit_tree([(b'a', blob1), (b'b', link1, 0o160000)]) tree2 = self.commit_tree([(b'c', blob2), (b'd', link2, 0o160000)]) self.assertEqual( [TreeChange.delete((b'a', 0o100644, blob1.id)), TreeChange.delete((b'b', 0o160000, link1)), TreeChange.add((b'c', 0o100644, blob2.id)), TreeChange.add((b'd', 0o160000, link2))], self.detect_renames(tree1, tree2)) def test_exact_rename_swap(self): blob1 = make_object(Blob, data=b'1') blob2 = make_object(Blob, data=b'2') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)]) self.assertEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id)), TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id), (b'b', F, blob1.id))], self.detect_renames(tree1, tree2)) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob1.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'a', F, blob2.id))], self.detect_renames(tree1, tree2, rewrite_threshold=50)) def test_content_rename_swap(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'e\nf\ng\nh\n') blob3 = make_object(Blob, data=b'a\nb\nc\ne\n') blob4 = make_object(Blob, data=b'e\nf\ng\ni\n') tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) tree2 = self.commit_tree([(b'a', blob4), (b'b', blob3)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob3.id)), TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'a', F, blob4.id))], self.detect_renames(tree1, tree2, rewrite_threshold=60)) def test_rewrite_threshold(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') blob3 = make_object(Blob, data=b'a\nb\nf\ng\n') tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'a', blob3), (b'b', blob2)]) no_renames = [ TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob3.id)), TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id))] self.assertEqual( no_renames, self.detect_renames(tree1, tree2)) self.assertEqual( no_renames, self.detect_renames( tree1, tree2, rewrite_threshold=40)) self.assertEqual( [TreeChange.add((b'a', F, blob3.id)), TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id))], self.detect_renames(tree1, tree2, rewrite_threshold=80)) def test_find_copies_harder_exact(self): blob = make_object(Blob, data=b'blob') tree1 = self.commit_tree([(b'a', blob)]) tree2 = self.commit_tree([(b'a', blob), (b'b', blob)]) self.assertEqual([TreeChange.add((b'b', F, blob.id))], self.detect_renames(tree1, tree2)) self.assertEqual( [TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'b', F, blob.id))], self.detect_renames(tree1, tree2, find_copies_harder=True)) def test_find_copies_harder_content(self): blob1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob2 = make_object(Blob, data=b'a\nb\nc\ne\n') tree1 = self.commit_tree([(b'a', blob1)]) tree2 = self.commit_tree([(b'a', blob1), (b'b', blob2)]) self.assertEqual([TreeChange.add((b'b', F, blob2.id))], self.detect_renames(tree1, tree2)) self.assertEqual( [TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id))], self.detect_renames(tree1, tree2, find_copies_harder=True)) def test_find_copies_harder_with_rewrites(self): blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob_a2 = make_object(Blob, data=b'f\ng\nh\ni\n') blob_b2 = make_object(Blob, data=b'a\nb\nc\ne\n') tree1 = self.commit_tree([(b'a', blob_a1)]) tree2 = self.commit_tree([(b'a', blob_a2), (b'b', blob_b2)]) self.assertEqual( [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a2.id)), TreeChange(CHANGE_COPY, (b'a', F, blob_a1.id), (b'b', F, blob_b2.id))], self.detect_renames(tree1, tree2, find_copies_harder=True)) self.assertEqual( [TreeChange.add((b'a', F, blob_a2.id)), TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id), (b'b', F, blob_b2.id))], self.detect_renames(tree1, tree2, rewrite_threshold=50, find_copies_harder=True)) def test_reuse_detector(self): blob = make_object(Blob, data=b'blob') tree1 = self.commit_tree([(b'a', blob)]) tree2 = self.commit_tree([(b'b', blob)]) detector = RenameDetector(self.store) changes = [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'b', F, blob.id))] self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id)) self.assertEqual(changes, detector.changes_with_renames(tree1.id, tree2.id)) def test_want_unchanged(self): blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n') blob_b = make_object(Blob, data=b'b') blob_c2 = make_object(Blob, data=b'a\nb\nc\ne\n') tree1 = self.commit_tree([(b'a', blob_a1), (b'b', blob_b)]) tree2 = self.commit_tree([(b'c', blob_c2), (b'b', blob_b)]) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id), (b'c', F, blob_c2.id))], self.detect_renames(tree1, tree2)) self.assertEqual( [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id), (b'c', F, blob_c2.id)), TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id), (b'b', F, blob_b.id))], self.detect_renames(tree1, tree2, want_unchanged=True)) diff --git a/dulwich/tests/test_ignore.py b/dulwich/tests/test_ignore.py index 339e8ad1..ade0e8a1 100644 --- a/dulwich/tests/test_ignore.py +++ b/dulwich/tests/test_ignore.py @@ -1,260 +1,260 @@ # test_ignore.py -- Tests for ignore files. # Copyright (C) 2017 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for ignore files.""" from io import BytesIO import os import re import shutil import tempfile from dulwich.tests import TestCase from dulwich.ignore import ( IgnoreFilter, IgnoreFilterManager, IgnoreFilterStack, Pattern, match_pattern, read_ignore_patterns, translate, ) from dulwich.repo import Repo POSITIVE_MATCH_TESTS = [ (b"foo.c", b"*.c"), (b".c", b"*.c"), (b"foo/foo.c", b"*.c"), (b"foo/foo.c", b"foo.c"), (b"foo.c", b"/*.c"), (b"foo.c", b"/foo.c"), (b"foo.c", b"foo.c"), (b"foo.c", b"foo.[ch]"), (b"foo/bar/bla.c", b"foo/**"), (b"foo/bar/bla/blie.c", b"foo/**/blie.c"), (b"foo/bar/bla.c", b"**/bla.c"), (b"bla.c", b"**/bla.c"), (b"foo/bar", b"foo/**/bar"), (b"foo/bla/bar", b"foo/**/bar"), (b"foo/bar/", b"bar/"), (b"foo/bar/", b"bar"), (b"foo/bar/something", b"foo/bar/*"), ] NEGATIVE_MATCH_TESTS = [ (b"foo.c", b"foo.[dh]"), (b"foo/foo.c", b"/foo.c"), (b"foo/foo.c", b"/*.c"), (b"foo/bar/", b"/bar/"), (b"foo/bar/", b"foo/bar/*"), ] TRANSLATE_TESTS = [ (b"*.c", b'(?ms)(.*/)?[^/]*\\.c/?\\Z'), (b"foo.c", b'(?ms)(.*/)?foo\\.c/?\\Z'), (b"/*.c", b'(?ms)[^/]*\\.c/?\\Z'), (b"/foo.c", b'(?ms)foo\\.c/?\\Z'), (b"foo.c", b'(?ms)(.*/)?foo\\.c/?\\Z'), (b"foo.[ch]", b'(?ms)(.*/)?foo\\.[ch]/?\\Z'), (b"bar/", b'(?ms)(.*/)?bar\\/\\Z'), (b"foo/**", b'(?ms)foo(/.*)?/?\\Z'), (b"foo/**/blie.c", b'(?ms)foo(/.*)?\\/blie\\.c/?\\Z'), (b"**/bla.c", b'(?ms)(.*/)?bla\\.c/?\\Z'), (b"foo/**/bar", b'(?ms)foo(/.*)?\\/bar/?\\Z'), (b"foo/bar/*", b'(?ms)foo\\/bar\\/[^/]+/?\\Z'), ] class TranslateTests(TestCase): def test_translate(self): for (pattern, regex) in TRANSLATE_TESTS: if re.escape(b'/') == b'/': # Slash is no longer escaped in Python3.7, so undo the escaping # in the expected return value.. regex = regex.replace(b'\\/', b'/') self.assertEqual( regex, translate(pattern), "orig pattern: %r, regex: %r, expected: %r" % (pattern, translate(pattern), regex)) class ReadIgnorePatterns(TestCase): def test_read_file(self): f = BytesIO(b""" # a comment # and an empty line: -\#not a comment +\\#not a comment !negative with trailing whitespace -with escaped trailing whitespace\ +with escaped trailing whitespace\\ """) # noqa: W291 self.assertEqual(list(read_ignore_patterns(f)), [ b'\\#not a comment', b'!negative', b'with trailing whitespace', b'with escaped trailing whitespace ' ]) class MatchPatternTests(TestCase): def test_matches(self): for (path, pattern) in POSITIVE_MATCH_TESTS: self.assertTrue( match_pattern(path, pattern), "path: %r, pattern: %r" % (path, pattern)) def test_no_matches(self): for (path, pattern) in NEGATIVE_MATCH_TESTS: self.assertFalse( match_pattern(path, pattern), "path: %r, pattern: %r" % (path, pattern)) class IgnoreFilterTests(TestCase): def test_included(self): filter = IgnoreFilter([b'a.c', b'b.c']) self.assertTrue(filter.is_ignored(b'a.c')) self.assertIs(None, filter.is_ignored(b'c.c')) self.assertEqual( [Pattern(b'a.c')], list(filter.find_matching(b'a.c'))) self.assertEqual( [], list(filter.find_matching(b'c.c'))) def test_included_ignorecase(self): filter = IgnoreFilter([b'a.c', b'b.c'], ignorecase=False) self.assertTrue(filter.is_ignored(b'a.c')) self.assertFalse(filter.is_ignored(b'A.c')) filter = IgnoreFilter([b'a.c', b'b.c'], ignorecase=True) self.assertTrue(filter.is_ignored(b'a.c')) self.assertTrue(filter.is_ignored(b'A.c')) self.assertTrue(filter.is_ignored(b'A.C')) def test_excluded(self): filter = IgnoreFilter([b'a.c', b'b.c', b'!c.c']) self.assertFalse(filter.is_ignored(b'c.c')) self.assertIs(None, filter.is_ignored(b'd.c')) self.assertEqual( [Pattern(b'!c.c')], list(filter.find_matching(b'c.c'))) self.assertEqual([], list(filter.find_matching(b'd.c'))) def test_include_exclude_include(self): filter = IgnoreFilter([b'a.c', b'!a.c', b'a.c']) self.assertTrue(filter.is_ignored(b'a.c')) self.assertEqual( [Pattern(b'a.c'), Pattern(b'!a.c'), Pattern(b'a.c')], list(filter.find_matching(b'a.c'))) def test_manpage(self): # A specific example from the gitignore manpage filter = IgnoreFilter([ b'/*', b'!/foo', b'/foo/*', b'!/foo/bar']) self.assertTrue(filter.is_ignored(b'a.c')) self.assertTrue(filter.is_ignored(b'foo/blie')) self.assertFalse(filter.is_ignored(b'foo')) self.assertFalse(filter.is_ignored(b'foo/bar')) self.assertFalse(filter.is_ignored(b'foo/bar/')) self.assertFalse(filter.is_ignored(b'foo/bar/bloe')) class IgnoreFilterStackTests(TestCase): def test_stack_first(self): filter1 = IgnoreFilter([b'[a].c', b'[b].c', b'![d].c']) filter2 = IgnoreFilter([b'[a].c', b'![b],c', b'[c].c', b'[d].c']) stack = IgnoreFilterStack([filter1, filter2]) self.assertIs(True, stack.is_ignored(b'a.c')) self.assertIs(True, stack.is_ignored(b'b.c')) self.assertIs(True, stack.is_ignored(b'c.c')) self.assertIs(False, stack.is_ignored(b'd.c')) self.assertIs(None, stack.is_ignored(b'e.c')) class IgnoreFilterManagerTests(TestCase): def test_load_ignore(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) with open(os.path.join(repo.path, '.gitignore'), 'wb') as f: f.write(b'/foo/bar\n') f.write(b'/dir2\n') f.write(b'/dir3/\n') os.mkdir(os.path.join(repo.path, 'dir')) with open(os.path.join(repo.path, 'dir', '.gitignore'), 'wb') as f: f.write(b'/blie\n') with open(os.path.join(repo.path, 'dir', 'blie'), 'wb') as f: f.write(b'IGNORED') p = os.path.join(repo.controldir(), 'info', 'exclude') with open(p, 'wb') as f: f.write(b'/excluded\n') m = IgnoreFilterManager.from_repo(repo) self.assertTrue(m.is_ignored('dir/blie')) self.assertIs(None, m.is_ignored(os.path.join('dir', 'bloe'))) self.assertIs(None, m.is_ignored('dir')) self.assertTrue(m.is_ignored(os.path.join('foo', 'bar'))) self.assertTrue(m.is_ignored(os.path.join('excluded'))) self.assertTrue(m.is_ignored(os.path.join( 'dir2', 'fileinignoreddir'))) self.assertFalse(m.is_ignored('dir3')) self.assertTrue(m.is_ignored('dir3/')) self.assertTrue(m.is_ignored('dir3/bla')) def test_load_ignore_ignorecase(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) config = repo.get_config() config.set(b'core', b'ignorecase', True) config.write_to_path() with open(os.path.join(repo.path, '.gitignore'), 'wb') as f: f.write(b'/foo/bar\n') f.write(b'/dir\n') m = IgnoreFilterManager.from_repo(repo) self.assertTrue(m.is_ignored(os.path.join('dir', 'blie'))) self.assertTrue(m.is_ignored(os.path.join('DIR', 'blie'))) def test_ignored_contents(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) with open(os.path.join(repo.path, '.gitignore'), 'wb') as f: f.write(b'a/*\n') f.write(b'!a/*.txt\n') m = IgnoreFilterManager.from_repo(repo) os.mkdir(os.path.join(repo.path, 'a')) self.assertIs(None, m.is_ignored('a')) self.assertIs(None, m.is_ignored('a/')) self.assertFalse(m.is_ignored('a/b.txt')) self.assertTrue(m.is_ignored('a/c.dat'))