diff --git a/dulwich/ignore.py b/dulwich/ignore.py index cc457a22..b75560f3 100644 --- a/dulwich/ignore.py +++ b/dulwich/ignore.py @@ -1,391 +1,391 @@ # Copyright (C) 2017 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Parsing of gitignore files. For details for the matching rules, see https://git-scm.com/docs/gitignore """ import os.path import re from typing import ( BinaryIO, Iterable, List, Optional, TYPE_CHECKING, Dict, Union, ) if TYPE_CHECKING: from dulwich.repo import Repo from dulwich.config import get_xdg_config_home_path, Config def _translate_segment(segment: bytes) -> bytes: if segment == b"*": return b"[^/]+" res = b"" i, n = 0, len(segment) while i < n: c = segment[i : i + 1] i = i + 1 if c == b"*": res += b"[^/]*" elif c == b"?": res += b"[^/]" elif c == b"[": j = i if j < n and segment[j : j + 1] == b"!": j = j + 1 if j < n and segment[j : j + 1] == b"]": j = j + 1 while j < n and segment[j : j + 1] != b"]": j = j + 1 if j >= n: res += b"\\[" else: stuff = segment[i:j].replace(b"\\", b"\\\\") i = j + 1 if stuff.startswith(b"!"): stuff = b"^" + stuff[1:] elif stuff.startswith(b"^"): stuff = b"\\" + stuff res += b"[" + stuff + b"]" else: res += re.escape(c) return res def translate(pat: bytes) -> bytes: """Translate a shell PATTERN to a regular expression. There is no way to quote meta-characters. Originally copied from fnmatch in Python 2.7, but modified for Dulwich to cope with features in Git ignore patterns. """ res = b"(?ms)" if b"/" not in pat[:-1]: # If there's no slash, this is a filename-based match res += b"(.*/)?" if pat.startswith(b"**/"): # Leading **/ pat = pat[2:] res += b"(.*/)?" if pat.startswith(b"/"): pat = pat[1:] for i, segment in enumerate(pat.split(b"/")): if segment == b"**": res += b"(/.*)?" continue else: res += (re.escape(b"/") if i > 0 else b"") + _translate_segment(segment) if not pat.endswith(b"/"): res += b"/?" return res + b"\\Z" def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: """Read a git ignore file. Args: f: File-like object to read from Returns: List of patterns """ for line in f: line = line.rstrip(b"\r\n") # Ignore blank lines, they're used for readability. - if not line: + if not line.strip(): continue if line.startswith(b"#"): # Comment continue # Trailing spaces are ignored unless they are quoted with a backslash. while line.endswith(b" ") and not line.endswith(b"\\ "): line = line[:-1] line = line.replace(b"\\ ", b" ") yield line def match_pattern(path: bytes, pattern: bytes, ignorecase: bool = False) -> bool: """Match a gitignore-style pattern against a path. Args: path: Path to match pattern: Pattern to match ignorecase: Whether to do case-sensitive matching Returns: bool indicating whether the pattern matched """ return Pattern(pattern, ignorecase).match(path) class Pattern(object): """A single ignore pattern.""" def __init__(self, pattern: bytes, ignorecase: bool = False): self.pattern = pattern self.ignorecase = ignorecase if pattern[0:1] == b"!": self.is_exclude = False pattern = pattern[1:] else: if pattern[0:1] == b"\\": pattern = pattern[1:] self.is_exclude = True flags = 0 if self.ignorecase: flags = re.IGNORECASE self._re = re.compile(translate(pattern), flags) def __bytes__(self) -> bytes: return self.pattern def __str__(self) -> str: return os.fsdecode(self.pattern) def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) and self.pattern == other.pattern and self.ignorecase == other.ignorecase ) def __repr__(self) -> str: return "%s(%r, %r)" % ( type(self).__name__, self.pattern, self.ignorecase, ) def match(self, path: bytes) -> bool: """Try to match a path against this ignore pattern. Args: path: Path to match (relative to ignore location) Returns: boolean """ return bool(self._re.match(path)) class IgnoreFilter(object): def __init__(self, patterns: Iterable[bytes], ignorecase: bool = False, path=None): self._patterns = [] # type: List[Pattern] self._ignorecase = ignorecase self._path = path for pattern in patterns: self.append_pattern(pattern) def append_pattern(self, pattern: bytes) -> None: """Add a pattern to the set.""" self._patterns.append(Pattern(pattern, self._ignorecase)) def find_matching(self, path: Union[bytes, str]) -> Iterable[Pattern]: """Yield all matching patterns for path. Args: path: Path to match Returns: Iterator over iterators """ if not isinstance(path, bytes): path = os.fsencode(path) for pattern in self._patterns: if pattern.match(path): yield pattern def is_ignored(self, path: bytes) -> Optional[bool]: """Check whether a path is ignored. For directories, include a trailing slash. Returns: status is None if file is not mentioned, True if it is included, False if it is explicitly excluded. """ status = None for pattern in self.find_matching(path): status = pattern.is_exclude return status @classmethod def from_path(cls, path, ignorecase: bool = False) -> "IgnoreFilter": with open(path, "rb") as f: return cls(read_ignore_patterns(f), ignorecase, path=path) def __repr__(self) -> str: path = getattr(self, "_path", None) if path is not None: return "%s.from_path(%r)" % (type(self).__name__, path) else: return "<%s>" % (type(self).__name__) class IgnoreFilterStack(object): """Check for ignore status in multiple filters.""" def __init__(self, filters): self._filters = filters def is_ignored(self, path: str) -> Optional[bool]: """Check whether a path is explicitly included or excluded in ignores. Args: path: Path to check Returns: None if the file is not mentioned, True if it is included, False if it is explicitly excluded. """ status = None for filter in self._filters: status = filter.is_ignored(path) if status is not None: return status return status def default_user_ignore_filter_path(config: Config) -> str: """Return default user ignore filter path. Args: config: A Config object Returns: Path to a global ignore file """ try: return config.get((b"core",), b"excludesFile") except KeyError: pass return get_xdg_config_home_path("git", "ignore") class IgnoreFilterManager(object): """Ignore file manager.""" def __init__( self, top_path: str, global_filters: List[IgnoreFilter], ignorecase: bool, ): self._path_filters = {} # type: Dict[str, Optional[IgnoreFilter]] self._top_path = top_path self._global_filters = global_filters self._ignorecase = ignorecase def __repr__(self) -> str: return "%s(%s, %r, %r)" % ( type(self).__name__, self._top_path, self._global_filters, self._ignorecase, ) def _load_path(self, path: str) -> Optional[IgnoreFilter]: try: return self._path_filters[path] except KeyError: pass p = os.path.join(self._top_path, path, ".gitignore") try: self._path_filters[path] = IgnoreFilter.from_path(p, self._ignorecase) except IOError: self._path_filters[path] = None return self._path_filters[path] def find_matching(self, path: str) -> Iterable[Pattern]: """Find matching patterns for path. Args: path: Path to check Returns: Iterator over Pattern instances """ if os.path.isabs(path): raise ValueError("%s is an absolute path" % path) filters = [(0, f) for f in self._global_filters] if os.path.sep != "/": path = path.replace(os.path.sep, "/") parts = path.split("/") matches = [] for i in range(len(parts) + 1): dirname = "/".join(parts[:i]) for s, f in filters: relpath = "/".join(parts[s:i]) if i < len(parts): # Paths leading up to the final part are all directories, # so need a trailing slash. relpath += "/" matches += list(f.find_matching(relpath)) ignore_filter = self._load_path(dirname) if ignore_filter is not None: filters.insert(0, (i, ignore_filter)) return iter(matches) def is_ignored(self, path: str) -> Optional[bool]: """Check whether a path is explicitly included or excluded in ignores. Args: path: Path to check Returns: None if the file is not mentioned, True if it is included, False if it is explicitly excluded. """ matches = list(self.find_matching(path)) if matches: return matches[-1].is_exclude return None @classmethod def from_repo(cls, repo: "Repo") -> "IgnoreFilterManager": """Create a IgnoreFilterManager from a repository. Args: repo: Repository object Returns: A `IgnoreFilterManager` object """ global_filters = [] for p in [ os.path.join(repo.controldir(), "info", "exclude"), default_user_ignore_filter_path(repo.get_config_stack()), ]: try: global_filters.append(IgnoreFilter.from_path(os.path.expanduser(p))) except IOError: pass config = repo.get_config_stack() ignorecase = config.get_boolean((b"core"), (b"ignorecase"), False) return cls(repo.path, global_filters, ignorecase) diff --git a/dulwich/tests/test_ignore.py b/dulwich/tests/test_ignore.py index 1e947beb..6cb5c661 100644 --- a/dulwich/tests/test_ignore.py +++ b/dulwich/tests/test_ignore.py @@ -1,272 +1,272 @@ # test_ignore.py -- Tests for ignore files. # Copyright (C) 2017 Jelmer Vernooij # # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as public by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for ignore files.""" from io import BytesIO import os import re import shutil import tempfile from dulwich.tests import TestCase from dulwich.ignore import ( IgnoreFilter, IgnoreFilterManager, IgnoreFilterStack, Pattern, match_pattern, read_ignore_patterns, translate, ) from dulwich.repo import Repo POSITIVE_MATCH_TESTS = [ (b"foo.c", b"*.c"), (b".c", b"*.c"), (b"foo/foo.c", b"*.c"), (b"foo/foo.c", b"foo.c"), (b"foo.c", b"/*.c"), (b"foo.c", b"/foo.c"), (b"foo.c", b"foo.c"), (b"foo.c", b"foo.[ch]"), (b"foo/bar/bla.c", b"foo/**"), (b"foo/bar/bla/blie.c", b"foo/**/blie.c"), (b"foo/bar/bla.c", b"**/bla.c"), (b"bla.c", b"**/bla.c"), (b"foo/bar", b"foo/**/bar"), (b"foo/bla/bar", b"foo/**/bar"), (b"foo/bar/", b"bar/"), (b"foo/bar/", b"bar"), (b"foo/bar/something", b"foo/bar/*"), ] NEGATIVE_MATCH_TESTS = [ (b"foo.c", b"foo.[dh]"), (b"foo/foo.c", b"/foo.c"), (b"foo/foo.c", b"/*.c"), (b"foo/bar/", b"/bar/"), (b"foo/bar/", b"foo/bar/*"), (b"foo/bar", b"foo?bar"), ] TRANSLATE_TESTS = [ (b"*.c", b"(?ms)(.*/)?[^/]*\\.c/?\\Z"), (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"), (b"/*.c", b"(?ms)[^/]*\\.c/?\\Z"), (b"/foo.c", b"(?ms)foo\\.c/?\\Z"), (b"foo.c", b"(?ms)(.*/)?foo\\.c/?\\Z"), (b"foo.[ch]", b"(?ms)(.*/)?foo\\.[ch]/?\\Z"), (b"bar/", b"(?ms)(.*/)?bar\\/\\Z"), (b"foo/**", b"(?ms)foo(/.*)?/?\\Z"), (b"foo/**/blie.c", b"(?ms)foo(/.*)?\\/blie\\.c/?\\Z"), (b"**/bla.c", b"(?ms)(.*/)?bla\\.c/?\\Z"), (b"foo/**/bar", b"(?ms)foo(/.*)?\\/bar/?\\Z"), (b"foo/bar/*", b"(?ms)foo\\/bar\\/[^/]+/?\\Z"), ] class TranslateTests(TestCase): def test_translate(self): for (pattern, regex) in TRANSLATE_TESTS: if re.escape(b"/") == b"/": # Slash is no longer escaped in Python3.7, so undo the escaping # in the expected return value.. regex = regex.replace(b"\\/", b"/") self.assertEqual( regex, translate(pattern), "orig pattern: %r, regex: %r, expected: %r" % (pattern, translate(pattern), regex), ) class ReadIgnorePatterns(TestCase): def test_read_file(self): f = BytesIO( b""" # a comment - +\x20\x20 # and an empty line: \\#not a comment !negative with trailing whitespace with escaped trailing whitespace\\ """ ) # noqa: W291 self.assertEqual( list(read_ignore_patterns(f)), [ b"\\#not a comment", b"!negative", b"with trailing whitespace", b"with escaped trailing whitespace ", ], ) class MatchPatternTests(TestCase): def test_matches(self): for (path, pattern) in POSITIVE_MATCH_TESTS: self.assertTrue( match_pattern(path, pattern), "path: %r, pattern: %r" % (path, pattern), ) def test_no_matches(self): for (path, pattern) in NEGATIVE_MATCH_TESTS: self.assertFalse( match_pattern(path, pattern), "path: %r, pattern: %r" % (path, pattern), ) class IgnoreFilterTests(TestCase): def test_included(self): filter = IgnoreFilter([b"a.c", b"b.c"]) self.assertTrue(filter.is_ignored(b"a.c")) self.assertIs(None, filter.is_ignored(b"c.c")) self.assertEqual([Pattern(b"a.c")], list(filter.find_matching(b"a.c"))) self.assertEqual([], list(filter.find_matching(b"c.c"))) def test_included_ignorecase(self): filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=False) self.assertTrue(filter.is_ignored(b"a.c")) self.assertFalse(filter.is_ignored(b"A.c")) filter = IgnoreFilter([b"a.c", b"b.c"], ignorecase=True) self.assertTrue(filter.is_ignored(b"a.c")) self.assertTrue(filter.is_ignored(b"A.c")) self.assertTrue(filter.is_ignored(b"A.C")) def test_excluded(self): filter = IgnoreFilter([b"a.c", b"b.c", b"!c.c"]) self.assertFalse(filter.is_ignored(b"c.c")) self.assertIs(None, filter.is_ignored(b"d.c")) self.assertEqual([Pattern(b"!c.c")], list(filter.find_matching(b"c.c"))) self.assertEqual([], list(filter.find_matching(b"d.c"))) def test_include_exclude_include(self): filter = IgnoreFilter([b"a.c", b"!a.c", b"a.c"]) self.assertTrue(filter.is_ignored(b"a.c")) self.assertEqual( [Pattern(b"a.c"), Pattern(b"!a.c"), Pattern(b"a.c")], list(filter.find_matching(b"a.c")), ) def test_manpage(self): # A specific example from the gitignore manpage filter = IgnoreFilter([b"/*", b"!/foo", b"/foo/*", b"!/foo/bar"]) self.assertTrue(filter.is_ignored(b"a.c")) self.assertTrue(filter.is_ignored(b"foo/blie")) self.assertFalse(filter.is_ignored(b"foo")) self.assertFalse(filter.is_ignored(b"foo/bar")) self.assertFalse(filter.is_ignored(b"foo/bar/")) self.assertFalse(filter.is_ignored(b"foo/bar/bloe")) class IgnoreFilterStackTests(TestCase): def test_stack_first(self): filter1 = IgnoreFilter([b"[a].c", b"[b].c", b"![d].c"]) filter2 = IgnoreFilter([b"[a].c", b"![b],c", b"[c].c", b"[d].c"]) stack = IgnoreFilterStack([filter1, filter2]) self.assertIs(True, stack.is_ignored(b"a.c")) self.assertIs(True, stack.is_ignored(b"b.c")) self.assertIs(True, stack.is_ignored(b"c.c")) self.assertIs(False, stack.is_ignored(b"d.c")) self.assertIs(None, stack.is_ignored(b"e.c")) class IgnoreFilterManagerTests(TestCase): def test_load_ignore(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) with open(os.path.join(repo.path, ".gitignore"), "wb") as f: f.write(b"/foo/bar\n") f.write(b"/dir2\n") f.write(b"/dir3/\n") os.mkdir(os.path.join(repo.path, "dir")) with open(os.path.join(repo.path, "dir", ".gitignore"), "wb") as f: f.write(b"/blie\n") with open(os.path.join(repo.path, "dir", "blie"), "wb") as f: f.write(b"IGNORED") p = os.path.join(repo.controldir(), "info", "exclude") with open(p, "wb") as f: f.write(b"/excluded\n") m = IgnoreFilterManager.from_repo(repo) self.assertTrue(m.is_ignored("dir/blie")) self.assertIs(None, m.is_ignored(os.path.join("dir", "bloe"))) self.assertIs(None, m.is_ignored("dir")) self.assertTrue(m.is_ignored(os.path.join("foo", "bar"))) self.assertTrue(m.is_ignored(os.path.join("excluded"))) self.assertTrue(m.is_ignored(os.path.join("dir2", "fileinignoreddir"))) self.assertFalse(m.is_ignored("dir3")) self.assertTrue(m.is_ignored("dir3/")) self.assertTrue(m.is_ignored("dir3/bla")) def test_nested_gitignores(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) with open(os.path.join(repo.path, '.gitignore'), 'wb') as f: f.write(b'/*\n') f.write(b'!/foo\n') os.mkdir(os.path.join(repo.path, 'foo')) with open(os.path.join(repo.path, 'foo', '.gitignore'), 'wb') as f: f.write(b'/bar\n') with open(os.path.join(repo.path, 'foo', 'bar'), 'wb') as f: f.write(b'IGNORED') m = IgnoreFilterManager.from_repo(repo) self.assertTrue(m.is_ignored('foo/bar')) def test_load_ignore_ignorecase(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) config = repo.get_config() config.set(b"core", b"ignorecase", True) config.write_to_path() with open(os.path.join(repo.path, ".gitignore"), "wb") as f: f.write(b"/foo/bar\n") f.write(b"/dir\n") m = IgnoreFilterManager.from_repo(repo) self.assertTrue(m.is_ignored(os.path.join("dir", "blie"))) self.assertTrue(m.is_ignored(os.path.join("DIR", "blie"))) def test_ignored_contents(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, tmp_dir) repo = Repo.init(tmp_dir) with open(os.path.join(repo.path, ".gitignore"), "wb") as f: f.write(b"a/*\n") f.write(b"!a/*.txt\n") m = IgnoreFilterManager.from_repo(repo) os.mkdir(os.path.join(repo.path, "a")) self.assertIs(None, m.is_ignored("a")) self.assertIs(None, m.is_ignored("a/")) self.assertFalse(m.is_ignored("a/b.txt")) self.assertTrue(m.is_ignored("a/c.dat"))