From 6763d4772bb06965d774aaab2dde5d82f33855a3 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 27 Aug 2022 18:27:32 +0300 Subject: [PATCH] Lots of fixes, performance improvements and refactorings. --- gitignorefile/__init__.py | 175 ++++++++++------- setup.py | 2 +- tests/test_cache.py | 21 +- tests/test_ignored.py | 14 +- tests/test_match.py | 398 +++++++++++++++++++++++--------------- 5 files changed, 377 insertions(+), 233 deletions(-) diff --git a/gitignorefile/__init__.py b/gitignorefile/__init__.py index 8fe3801..5769841 100644 --- a/gitignorefile/__init__.py +++ b/gitignorefile/__init__.py @@ -3,25 +3,23 @@ import re -def parse(full_path, base_dir=None): - if base_dir is None: - base_dir = os.path.dirname(full_path) +def parse(full_path, base_path=None): + if base_path is None: + base_path = os.path.dirname(full_path) or os.path.dirname(os.path.abspath(full_path)) rules = [] with open(full_path) as ignore_file: for i, line in enumerate(ignore_file, start=1): line = line.rstrip("\r\n") - rule = _rule_from_pattern(line, base_path=os.path.abspath(base_dir), source=(full_path, i)) + rule = _rule_from_pattern(line, source=(full_path, i)) if rule: rules.append(rule) - if not any((r.negation for r in rules)): - return lambda file_path: any((r.match(file_path) for r in rules)) + # TODO probably combine to single regexp. - else: - # We have negation rules. We can't use a simple "any" to evaluate them. - # Later rules override earlier rules. - return lambda file_path: _handle_negation(file_path, rules) + # We have negation rules. We can't use a simple "any" to evaluate them. + # Later rules override earlier rules. + return lambda file_path, is_dir=None: _handle_negation(file_path, rules, base_path=base_path, is_dir=is_dir) def ignore(): @@ -29,16 +27,16 @@ def ignore(): return lambda root, names: {name for name in names if matches(os.path.join(root, name))} -def ignored(path): - return Cache()(path) +def ignored(path, is_dir=None): + return Cache()(path, is_dir=is_dir) class Cache: def __init__(self): self.__gitignores = {} - def __get_parents(self, path): - if not os.path.isdir(path): + def __get_parents(self, path, is_dir): + if not is_dir: path = os.path.dirname(path) yield path @@ -50,15 +48,18 @@ def __get_parents(self, path): else: break - def __call__(self, path): + def __call__(self, path, is_dir=None): + if is_dir is None: + is_dir = os.path.isdir(path) + add_to_children = {} plain_paths = [] - for parent in self.__get_parents(os.path.abspath(path)): + for parent in self.__get_parents(os.path.abspath(path), is_dir=is_dir): if parent in self.__gitignores: break elif os.path.isfile(os.path.join(parent, ".gitignore")): - p = parse(os.path.join(parent, ".gitignore"), base_dir=parent) + p = parse(os.path.join(parent, ".gitignore"), base_path=parent) add_to_children[parent] = (p, plain_paths) plain_paths = [] @@ -83,49 +84,69 @@ def __call__(self, path): for plain_path in parent_plain_paths: self.__gitignores[plain_path] = self.__gitignores[parent] - return any((m(path) for m in self.__gitignores[parent])) # This parent comes either from first or second loop. + return any( + (m(path, is_dir=is_dir) for m in self.__gitignores[parent]) + ) # This parent comes either from first or second loop. + + +def _handle_negation(file_path, rules, base_path=None, is_dir=None): + """ + Because Git allows for nested `.gitignore` files, a `base_path` value + is required for correct behavior. + """ + return_immediately = not any((r.negation for r in rules)) + + if is_dir is None: + is_dir = os.path.isdir(file_path) + if base_path is not None: + rel_path = os.path.relpath(file_path, base_path) + else: + rel_path = file_path + + if rel_path.startswith(f".{os.sep}"): + rel_path = rel_path[2:] -def _handle_negation(file_path, rules): matched = False for rule in rules: - if rule.match(file_path): - if rule.negation: - matched = False - else: - matched = True - return matched + if rule.match(rel_path, is_dir): + matched = not rule.negation + if matched and return_immediately: + return True + + else: + return matched -def _rule_from_pattern(pattern, base_path=None, source=None): +def _rule_from_pattern(pattern, source=None): """ - Take a .gitignore match pattern, such as "*.py[cod]" or "**/*.bak", - and return an _IgnoreRule suitable for matching against files and + Take a `.gitignore` match pattern, such as "*.py[cod]" or "**/*.bak", + and return an `_IgnoreRule` suitable for matching against files and directories. Patterns which do not match files, such as comments - and blank lines, will return None. - Because git allows for nested .gitignore files, a base_path value - is required for correct behavior. The base path should be absolute. + and blank lines, will return `None`. """ - if base_path and base_path != os.path.abspath(base_path): - raise ValueError("base_path must be absolute") # Store the exact pattern for our repr and string functions orig_pattern = pattern + # Early returns follow # Discard comments and separators if not pattern.lstrip() or pattern.lstrip().startswith("#"): return + # Discard anything with more than two consecutive asterisks if "***" in pattern: return + # Strip leading bang before examining double asterisks - if pattern[0] == "!": + if pattern.startswith("!"): negation = True pattern = pattern[1:] else: negation = False + # Discard anything with invalid double-asterisks -- they can appear # at the start or the end, or be surrounded by slashes - for m in re.finditer(r"\*\*", pattern): + for m in re.finditer("\\*\\*", pattern): start_index = m.start() if ( start_index != 0 @@ -139,9 +160,11 @@ def _rule_from_pattern(pattern, base_path=None, source=None): return directory_only = pattern.endswith("/") - # A slash is a sign that we're tied to the base_path of our rule + + # A slash is a sign that we're tied to the `base_path` of our rule # set. anchored = "/" in pattern[:-1] + if pattern.startswith("/"): pattern = pattern[1:] if pattern.startswith("**"): @@ -151,9 +174,11 @@ def _rule_from_pattern(pattern, base_path=None, source=None): pattern = pattern[1:] if pattern.endswith("/"): pattern = pattern[:-1] + # patterns with leading hashes are escaped with a backslash in front, unescape it if pattern.startswith("\\#"): pattern = pattern[1:] + # trailing spaces are ignored unless they are escaped with a backslash i = len(pattern) - 1 striptrailingspaces = True @@ -166,27 +191,28 @@ def _rule_from_pattern(pattern, base_path=None, source=None): if striptrailingspaces: pattern = pattern[:i] i -= 1 - regex = _fnmatch_pathname_to_regex(pattern, directory_only) + + regexp = _fnmatch_pathname_to_regexp(pattern, directory_only) + if anchored: - regex = f"^{regex}" + regexp = f"^{regexp}" + return _IgnoreRule( pattern=orig_pattern, - regex=regex, + regexp=regexp, negation=negation, directory_only=directory_only, anchored=anchored, - base_path=base_path, source=source, ) _IGNORE_RULE_FIELDS = [ "pattern", - "regex", # Basic values + "regexp", # Basic values "negation", "directory_only", "anchored", # Behavior flags - "base_path", # Meaningful for gitignore-style behavior "source", # (file, line) tuple for reporting ] @@ -198,38 +224,39 @@ def __str__(self): def __repr__(self): return "".join(["_IgnoreRule('", self.pattern, "')"]) - def match(self, abs_path): - matched = False - if self.base_path: - rel_path = str(os.path.relpath(abs_path, self.base_path)) - else: - rel_path = str(abs_path) - seps_group, _ = _seps_non_sep_expr() - if rel_path.startswith(f".{seps_group}"): - rel_path = rel_path[2:] - if re.search(self.regex, rel_path): - matched = True - return matched + def match(self, rel_path, is_dir): + match = re.search(self.regexp, rel_path) + + # If we need a directory, check there is something after slash and if there is not, target must be a directory. + # If there is something after slash then it's a directory irrelevant to type of target. + # `self.directory_only` implies we have group number 1. + # N.B. Question mark inside a group without a name can shift indices. :( + return match and (not self.directory_only or match.group(1) is not None or is_dir) def _seps_non_sep_expr(): - seps = [re.escape(os.sep)] - if os.altsep is not None: - seps.append(re.escape(os.altsep)) - return "[" + "|".join(seps) + "]", "[^{}]".format("|".join(seps)) + if os.altsep is None: + seps = re.escape(os.sep) + non_sep = f"[^{re.escape(os.sep)}]" + + else: + seps = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]" + non_sep = f"[^{re.escape(os.sep)}{re.escape(os.altsep)}]" + + return seps, non_sep # Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME # option that `.gitignore`'s behavior depends on. -def _fnmatch_pathname_to_regex(pattern, directory_only): +def _fnmatch_pathname_to_regexp(pattern, directory_only): """ Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged; the path separator will not match shell-style '*' and '.' wildcards. """ i, n = 0, len(pattern) - seps_group, nonsep = _seps_non_sep_expr() - res = [f"(^|{seps_group})"] + seps_group, non_sep = _seps_non_sep_expr() + res = [f"(?:^|{seps_group})"] if pattern else [] # Empty name means no path fragment. while i < n: c = pattern[i] i += 1 @@ -242,13 +269,16 @@ def _fnmatch_pathname_to_regex(pattern, directory_only): i += 1 res.append(f"{seps_group}?") else: - res.append(f"{nonsep}*") + res.append(f"{non_sep}*") except IndexError: - res.append(f"{nonsep}*") + res.append(f"{non_sep}*") + elif c == "?": - res.append(nonsep) + res.append(non_sep) + elif c == "/": res.append(seps_group) + elif c == "[": j = i if j < n and pattern[j] == "!": @@ -257,18 +287,25 @@ def _fnmatch_pathname_to_regex(pattern, directory_only): j += 1 while j < n and pattern[j] != "]": j += 1 + if j >= n: res.append("\\[") else: stuff = pattern[i:j].replace("\\", "\\\\") i = j + 1 if stuff[0] == "!": - stuff = "".join(["^", stuff[1:]]) + stuff = f"^{stuff[1:]}" elif stuff[0] == "^": - stuff = "".join("\\" + stuff) - res.append("[{}]".format(stuff)) + stuff = f"\\{stuff}" + res.append(f"[{stuff}]") + else: res.append(re.escape(c)) - if not directory_only: - res.append(f"({seps_group}|$)") + + if directory_only: # In this case we are interested if there is something after slash. + res.append(f"({seps_group}.+)?$") + + else: + res.append(f"(?:{seps_group}|$)") + return "".join(res) diff --git a/setup.py b/setup.py index e4f2918..9332444 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open(f"{os.path.dirname(os.path.abspath(__file__))}/README.md") as readme: setuptools.setup( name="gitignorefile", - version="1.0.4", + version="1.0.5", description="A spec-compliant `.gitignore` parser for Python", long_description=readme.read(), long_description_content_type="text/markdown", diff --git a/tests/test_cache.py b/tests/test_cache.py index d3a30de..53a061b 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -9,6 +9,9 @@ class TestCache(unittest.TestCase): def test_simple(self): + def normalize_path(path): + return os.path.abspath(path).replace(os.sep, "/") + class StatResult: def __init__(self, is_file=False): self.st_ino = id(self) @@ -19,13 +22,13 @@ class Stat: def __init__(self, directories, files): self.__filesystem = {} for path in directories: - self.__filesystem[path] = StatResult() + self.__filesystem[normalize_path(path)] = StatResult() for path in files: - self.__filesystem[path] = StatResult(True) + self.__filesystem[normalize_path(path)] = StatResult(True) def __call__(self, path): try: - return self.__filesystem[path] + return self.__filesystem[normalize_path(path)] except KeyError: raise FileNotFoundError() @@ -52,21 +55,19 @@ def __call__(self, path): def mock_open(path): data = { - "/home/vladimir/project/directory/.gitignore": ["file.txt"], - "/home/vladimir/project/.gitignore": ["file2.txt"], + normalize_path("/home/vladimir/project/directory/.gitignore"): ["file.txt"], + normalize_path("/home/vladimir/project/.gitignore"): ["file2.txt"], } statistics["open"] += 1 - path = os.path.abspath(path).replace(os.sep, "/") try: - return unittest.mock.mock_open(read_data="\n".join(data[path]))(path) + return unittest.mock.mock_open(read_data="\n".join(data[normalize_path(path)]))(path) except KeyError: raise FileNotFoundError() def mock_stat(path): statistics["stat"] += 1 - path = os.path.abspath(path).replace(os.sep, "/") return my_stat(path) with unittest.mock.patch("builtins.open", mock_open): @@ -79,7 +80,9 @@ def mock_stat(path): self.assertFalse(matches("/home/vladimir/project/file.txt")) self.assertEqual(statistics["open"], 2) - self.assertEqual(statistics["stat"], 6 * (2 + 1) + 5) + + # On Windows and Python 3.7 `os.path.isdir()` does not use `os.stat`. See `Modules/getpath.c`. + self.assertIn(statistics["stat"], (6 * (2 + 1) + 5, 6 * (2 + 1))) def test_wrong_symlink(self): with tempfile.TemporaryDirectory() as d: diff --git a/tests/test_ignored.py b/tests/test_ignored.py index 5956649..72e422d 100644 --- a/tests/test_ignored.py +++ b/tests/test_ignored.py @@ -6,5 +6,15 @@ class TestIgnored(unittest.TestCase): def test_simple(self): - self.assertFalse(gitignorefile.ignored(__file__)) - self.assertTrue(gitignorefile.ignored(f"{os.path.dirname(__file__)}/__pycache__/some.pyc")) + for is_dir in (None, False, True): + with self.subTest(i=is_dir): + self.assertFalse(gitignorefile.ignored(__file__, is_dir=is_dir)) + if is_dir is not True: + self.assertTrue( + gitignorefile.ignored(f"{os.path.dirname(__file__)}/__pycache__/some.pyc", is_dir=is_dir) + ) + self.assertFalse(gitignorefile.ignored(os.path.dirname(__file__), is_dir=is_dir)) + if is_dir is not False: + self.assertTrue(gitignorefile.ignored(f"{os.path.dirname(__file__)}/__pycache__", is_dir=is_dir)) + else: + self.assertFalse(gitignorefile.ignored(f"{os.path.dirname(__file__)}/__pycache__", is_dir=is_dir)) diff --git a/tests/test_match.py b/tests/test_match.py index ec67ce8..64a7147 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -10,34 +10,42 @@ class TestMatch(unittest.TestCase): def test_simple(self): matches = self.__parse_gitignore_string(["__pycache__/", "*.py[cod]"], fake_base_dir="/home/michael") - self.assertFalse(matches("/home/michael/main.py")) - self.assertTrue(matches("/home/michael/main.pyc")) - self.assertTrue(matches("/home/michael/dir/main.pyc")) - self.assertTrue(matches("/home/michael/__pycache__")) - self.assertTrue(matches("/home/michael/__pycache__/")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/main.py", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/main.pyc", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/dir/main.pyc", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/__pycache__", is_dir=False)) + self.assertTrue(matches("/home/michael/__pycache__", is_dir=True)) def test_simple_without_trailing_slash(self): matches = self.__parse_gitignore_string(["__pycache__", "*.py[cod]"], fake_base_dir="/home/michael") - self.assertFalse(matches("/home/michael/main.py")) - self.assertTrue(matches("/home/michael/main.pyc")) - self.assertTrue(matches("/home/michael/dir/main.pyc")) - self.assertTrue(matches("/home/michael/__pycache__")) - self.assertTrue(matches("/home/michael/__pycache__/")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/main.py", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/main.pyc", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/dir/main.pyc", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/__pycache__", is_dir=is_dir)) def test_wildcard(self): matches = self.__parse_gitignore_string(["hello.*"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/hello.txt")) - self.assertTrue(matches("/home/michael/hello.foobar/")) - self.assertTrue(matches("/home/michael/dir/hello.txt")) - self.assertTrue(matches("/home/michael/hello.")) - self.assertFalse(matches("/home/michael/hello")) - self.assertFalse(matches("/home/michael/helloX")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/hello.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/hello.foobar", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/dir/hello.txt", is_dir=is_dir)) + if os.name != "nt": # Invalid path on Windows will be normalized in `os.path.relpath`. + self.assertTrue(matches("/home/michael/hello.", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/hello", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/helloX", is_dir=is_dir)) def test_anchored_wildcard(self): matches = self.__parse_gitignore_string(["/hello.*"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/hello.txt")) - self.assertTrue(matches("/home/michael/hello.c")) - self.assertFalse(matches("/home/michael/a/hello.java")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/hello.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/hello.c", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/a/hello.java", is_dir=is_dir)) def test_trailingspaces(self): matches = self.__parse_gitignore_string( @@ -50,143 +58,221 @@ def test_trailingspaces(self): ], fake_base_dir="/home/michael", ) - self.assertTrue(matches("/home/michael/ignoretrailingspace")) - self.assertFalse(matches("/home/michael/ignoretrailingspace ")) - self.assertTrue(matches("/home/michael/partiallyignoredspace ")) - self.assertFalse(matches("/home/michael/partiallyignoredspace ")) - self.assertFalse(matches("/home/michael/partiallyignoredspace")) - self.assertTrue(matches("/home/michael/partiallyignoredspace2 ")) - self.assertFalse(matches("/home/michael/partiallyignoredspace2 ")) - self.assertFalse(matches("/home/michael/partiallyignoredspace2 ")) - self.assertFalse(matches("/home/michael/partiallyignoredspace2")) - self.assertTrue(matches("/home/michael/notignoredspace ")) - self.assertFalse(matches("/home/michael/notignoredspace")) - self.assertTrue(matches("/home/michael/notignoredmultiplespace ")) - self.assertFalse(matches("/home/michael/notignoredmultiplespace")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/ignoretrailingspace", is_dir=is_dir)) + if os.name != "nt": # Invalid path on Windows will be normalized in `os.path.relpath`. + self.assertFalse(matches("/home/michael/ignoretrailingspace ", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/partiallyignoredspace ", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/partiallyignoredspace ", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/partiallyignoredspace2 ", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/partiallyignoredspace2 ", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/partiallyignoredspace2 ", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/notignoredspace ", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/notignoredmultiplespace ", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/partiallyignoredspace", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/partiallyignoredspace2", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/notignoredspace", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/notignoredmultiplespace", is_dir=is_dir)) def test_comment(self): matches = self.__parse_gitignore_string( ["somematch", "#realcomment", "othermatch", "\\#imnocomment"], fake_base_dir="/home/michael", ) - self.assertTrue(matches("/home/michael/somematch")) - self.assertFalse(matches("/home/michael/#realcomment")) - self.assertTrue(matches("/home/michael/othermatch")) - self.assertTrue(matches("/home/michael/#imnocomment")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/somematch", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/#realcomment", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/othermatch", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/#imnocomment", is_dir=is_dir)) + + def test_second_level_directories(self): + """ + For example, a pattern `doc/frotz/` matches `doc/frotz` directory, but not `a/doc/frotz` directory; + however `frotz/` matches `frotz` and `a/frotz` that is a directory (all paths are relative from the + `.gitignore` file). See https://git-scm.com/docs/gitignore . + """ + matches = self.__parse_gitignore_string(["doc/frotz/"], fake_base_dir="/home/michael") + self.assertFalse(matches("/home/michael/doc/frotz", is_dir=False)) + self.assertTrue(matches("/home/michael/doc/frotz", is_dir=True)) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/a/doc/frotz", is_dir=is_dir)) + + def test_second_level_directories_unchained(self): + matches = self.__parse_gitignore_string(["**/doc/frotz/"], fake_base_dir="/home/michael") + self.assertFalse(matches("/home/michael/doc/frotz", is_dir=False)) + self.assertTrue(matches("/home/michael/doc/frotz", is_dir=True)) + self.assertFalse(matches("/home/michael/a/doc/frotz", is_dir=False)) + self.assertTrue(matches("/home/michael/a/doc/frotz", is_dir=True)) + self.assertFalse(matches("/home/michael/a/b/doc/frotz", is_dir=False)) + self.assertTrue(matches("/home/michael/a/b/doc/frotz", is_dir=True)) + + def test_second_level_files(self): + matches = self.__parse_gitignore_string(["doc/frotz"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/doc/frotz", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/a/doc/frotz", is_dir=is_dir)) + + def test_ignore_file(self): + matches = self.__parse_gitignore_string([".venv"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/.venv", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/.venv/folder", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/.venv/file.txt", is_dir=is_dir)) def test_ignore_directory(self): matches = self.__parse_gitignore_string([".venv/"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/.venv")) - self.assertTrue(matches("/home/michael/.venv/folder")) - self.assertTrue(matches("/home/michael/.venv/file.txt")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/.venv/folder", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/.venv/file.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/.venv", is_dir=False)) + self.assertTrue(matches("/home/michael/.venv", is_dir=True)) + + def test_ignore_directory_greedy(self): + matches = self.__parse_gitignore_string([".venv"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/.venvlol", is_dir=is_dir)) + + def test_ignore_file_greedy(self): + matches = self.__parse_gitignore_string([".venv/"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/.venvlol", is_dir=is_dir)) def test_ignore_directory_asterisk(self): matches = self.__parse_gitignore_string([".venv/*"], fake_base_dir="/home/michael") - self.assertFalse(matches("/home/michael/.venv")) - self.assertTrue(matches("/home/michael/.venv/folder")) - self.assertTrue(matches("/home/michael/.venv/file.txt")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/.venv", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/.venv/folder", is_dir=is_dir)) def test_negation(self): matches = self.__parse_gitignore_string( ["*.ignore", "!keep.ignore"], fake_base_dir="/home/michael", ) - self.assertTrue(matches("/home/michael/trash.ignore")) - self.assertFalse(matches("/home/michael/keep.ignore")) - self.assertTrue(matches("/home/michael/waste.ignore")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/trash.ignore", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/keep.ignore", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/waste.ignore", is_dir=is_dir)) def test_double_asterisks(self): matches = self.__parse_gitignore_string(["foo/**/Bar"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/foo/hello/Bar")) - self.assertTrue(matches("/home/michael/foo/world/Bar")) - self.assertTrue(matches("/home/michael/foo/Bar")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/foo/hello/Bar", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/foo/hello/world/Bar", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/foo/world/Bar", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/foo/Bar", is_dir=is_dir)) def test_single_asterisk(self): matches = self.__parse_gitignore_string(["*"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/file.txt")) - self.assertTrue(matches("/home/michael/directory")) - self.assertTrue(matches("/home/michael/directory-trailing/")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/file.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/directory/file.txt", is_dir=is_dir)) def test_spurious_matches(self): matches = self.__parse_gitignore_string(["abc"], fake_base_dir="/home/michael") - self.assertFalse(matches("/home/michael/abc.txt")) - self.assertFalse(matches("/home/michael/file-abc.txt")) - self.assertFalse(matches("/home/michael/fileabc")) - self.assertFalse(matches("/home/michael/directoryabc/")) - self.assertFalse(matches("/home/michael/directoryabc-trailing")) - self.assertFalse(matches("/home/michael/directoryabc-trailing/")) - self.assertFalse(matches("/home/michael/abc-suffixed/file.txt")) - self.assertFalse(matches("/home/michael/subdir/abc.txt")) - self.assertFalse(matches("/home/michael/subdir/directoryabc")) - self.assertFalse(matches("/home/michael/subdir/directory-abc-trailing/")) - self.assertFalse(matches("/home/michael/subdir/directory-abc-trailing/file.txt")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/michael/abc.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/file-abc.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/fileabc", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/directoryabc-trailing", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/abc-suffixed/file.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/subdir/abc.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/subdir/directoryabc", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/subdir/directory-abc-trailing", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/subdir/directory-abc-trailing/file.txt", is_dir=is_dir)) def test_does_not_fail_with_symlinks(self): with tempfile.TemporaryDirectory() as d: matches = self.__parse_gitignore_string(["*.venv"], fake_base_dir=d) os.makedirs(f"{d}/.venv/bin") os.symlink(sys.executable, f"{d}/.venv/bin/python") - matches(f"{d}/.venv/bin/python") + self.assertTrue(matches(f"{d}/.venv/bin/python")) def test_single_letter(self): matches = self.__parse_gitignore_string(["a"], fake_base_dir="/home/michael") - self.assertTrue(matches("/home/michael/a")) - self.assertFalse(matches("/home/michael/b")) - self.assertTrue(matches("/home/michael/b/a")) - self.assertTrue(matches("/home/michael/a/b")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/a", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/b", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/b/a", is_dir=is_dir)) + self.assertTrue(matches("/home/michael/a/b", is_dir=is_dir)) + + def test_ignore_all_subdirectories(self): + matches = self.__parse_gitignore_string(["**/"], fake_base_dir="/home/michael") + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/michael/directory/file", is_dir=is_dir)) + self.assertFalse(matches("/home/michael/file.txt", is_dir=False)) + self.assertTrue(matches("/home/michael/directory", is_dir=True)) def test_robert_simple_rules(self): matches = self.__parse_gitignore_string(["__pycache__", "*.py[cod]", ".venv/"], fake_base_dir="/home/robert") - self.assertFalse(matches("/home/robert/main.py")) - self.assertTrue(matches("/home/robert/dir/main.pyc")) - self.assertTrue(matches("/home/robert/__pycache__")) - self.assertTrue(matches("/home/robert/.venv")) - self.assertTrue(matches("/home/robert/.venv/")) - self.assertTrue(matches("/home/robert/.venv/folder")) - self.assertTrue(matches("/home/robert/.venv/file.txt")) - self.assertTrue(matches("/home/robert/.venv/folder/file.txt")) - self.assertTrue(matches("/home/robert/.venv/folder/folder")) - self.assertTrue(matches("/home/robert/.venv/folder/folder/")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertFalse(matches("/home/robert/main.py", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/dir/main.pyc", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/__pycache__", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.venv/folder", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.venv/file.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.venv/folder/file.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.venv/folder/folder", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.venv", is_dir=True)) + self.assertFalse(matches("/home/robert/.venv", is_dir=False)) def test_robert_comments(self): matches = self.__parse_gitignore_string( ["somematch", "#realcomment", "othermatch", "\\#imnocomment"], fake_base_dir="/home/robert" ) - self.assertTrue(matches("/home/robert/somematch")) - self.assertFalse(matches("/home/robert/#realcomment")) - self.assertTrue(matches("/home/robert/othermatch")) - self.assertFalse(matches("/home/robert")) - self.assertFalse(matches("/home/robert/")) - self.assertFalse(matches("/home/robert/\\")) - self.assertTrue(matches("/home/robert/#imnocomment")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/robert/somematch", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/#realcomment", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/othermatch", is_dir=is_dir)) + self.assertFalse(matches("/home/robert", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/\\", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/#imnocomment", is_dir=is_dir)) def test_robert_wildcard(self): matches = self.__parse_gitignore_string(["hello.*"], fake_base_dir="/home/robert") - self.assertTrue(matches("/home/robert/hello.txt")) - self.assertTrue(matches("/home/robert/hello.foobar")) - self.assertTrue(matches("/home/robert/hello.foobar/")) - self.assertTrue(matches("/home/robert/dir/hello.txt")) - self.assertFalse(matches("/home/robert/dir/shello.txt")) - - self.assertTrue( - matches("/home/robert/dir/hello.") - ) # FIXME On Windows there can be no files ending with a point? - - self.assertFalse(matches("/home/robert/dir/hello")) - self.assertFalse(matches("/home/robert/dir/helloX")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/robert/hello.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/dir/hello.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/dir/shello.txt", is_dir=is_dir)) + if os.name != "nt": # Invalid path on Windows will be normalized in `os.path.relpath`. + self.assertTrue(matches("/home/robert/dir/hello.", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/dir/hello", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/dir/helloX", is_dir=is_dir)) def test_robert_anchored_wildcard(self): matches = self.__parse_gitignore_string(["/hello.*"], fake_base_dir="/home/robert") - self.assertTrue(matches("/home/robert/hello.txt")) - self.assertTrue(matches("/home/robert/hello.c")) - self.assertFalse(matches("/home/robert/a/hello.java")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/robert/hello.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/hello.c", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/a/hello.java", is_dir=is_dir)) def test_robert_negation_rules(self): matches = self.__parse_gitignore_string(["*.ignore", "!keep.ignore"], fake_base_dir="/home/robert") - self.assertTrue(matches("/home/robert/trash.ignore")) - self.assertTrue(matches("/home/robert/whatever.ignore")) - self.assertFalse(matches("/home/robert/keep.ignore")) - self.assertTrue(matches("/home/robert/!keep.ignore")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/robert/trash.ignore", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/whatever.ignore", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/keep.ignore", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/!keep.ignore", is_dir=is_dir)) def test_robert_match_does_not_resolve_symlinks(self): """Test match on files under symlinked directories @@ -198,21 +284,23 @@ def test_robert_match_does_not_resolve_symlinks(self): matches = self.__parse_gitignore_string(["*.venv"], fake_base_dir=d) os.makedirs(f"{d}/.venv/bin") os.symlink(sys.executable, f"{d}/.venv/bin/python") - self.assertTrue(matches(f"{d}/.venv")) - self.assertTrue(matches(f"{d}/.venv/")) - self.assertTrue(matches(f"{d}/.venv/bin")) - self.assertTrue(matches(f"{d}/.venv/bin/")) - self.assertTrue(matches(f"{d}/.venv/bin/python")) - self.assertFalse(matches(f"{d}/.venv2")) - self.assertFalse(matches(f"{d}/.venv2/")) - self.assertFalse(matches(f"{d}/.venv2/bin")) - self.assertFalse(matches(f"{d}/.venv2/bin/")) - self.assertFalse(matches(f"{d}/.venv2/bin/python")) - self.assertTrue(matches(f"{d}/a.venv")) - self.assertTrue(matches(f"{d}/a.venv/")) - self.assertTrue(matches(f"{d}/a.venv/bin")) - self.assertTrue(matches(f"{d}/a.venv/bin/")) - self.assertTrue(matches(f"{d}/a.venv/bin/python")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches(f"{d}/.venv", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/.venv/", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/.venv/bin", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/.venv/bin/", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/.venv/bin/python", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/.venv2", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/.venv2/", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/.venv2/bin", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/.venv2/bin/", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/.venv2/bin/python", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/a.venv", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/a.venv/", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/a.venv/bin", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/a.venv/bin/", is_dir=is_dir)) + self.assertTrue(matches(f"{d}/a.venv/bin/python", is_dir=is_dir)) def test_robert_match_files_under_symlink(self): # FIXME What's going on? @@ -226,20 +314,23 @@ def test_robert_match_files_under_symlink(self): def test_robert_handle_base_directories_with_a_symlink_in_their_components(self): """ - see https://github.com/bitranox/igittigitt/issues/28 + See https://github.com/bitranox/igittigitt/issues/28 . """ with tempfile.TemporaryDirectory() as d: os.makedirs(f"{d}/igittigitt01") os.symlink(f"{d}/igittigitt01", f"{d}/symlink_to_igittigitt01", target_is_directory=True) + matches = self.__parse_gitignore_string(["*.txt"], fake_base_dir=f"{d}/symlink_to_igittigitt01") - self.assertTrue(matches(f"{d}/symlink_to_igittigitt01/file.txt")) - self.assertFalse(matches(f"{d}/symlink_to_igittigitt01/file.png")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches(f"{d}/symlink_to_igittigitt01/file.txt", is_dir=is_dir)) + self.assertFalse(matches(f"{d}/symlink_to_igittigitt01/file.png", is_dir=is_dir)) + + for path in (f"{d}/symlink_to_igittigitt01/file.txt", f"{d}/symlink_to_igittigitt01/file.png"): + with open(path, "w"): + pass - with open(f"{d}/symlink_to_igittigitt01/file.txt", "w"): - pass - with open(f"{d}/symlink_to_igittigitt01/file.png", "w"): - pass self.assertTrue(matches(f"{d}/symlink_to_igittigitt01/file.txt")) self.assertFalse(matches(f"{d}/symlink_to_igittigitt01/file.png")) @@ -256,33 +347,36 @@ def test_robert_parse_rule_files(self): fake_base_dir="/home/robert", ) - self.assertTrue(matches("/home/robert/test__pycache__")) - self.assertTrue(matches("/home/robert/test__pycache__/.test_gitignore")) - self.assertTrue(matches("/home/robert/test__pycache__/excluded")) - self.assertTrue(matches("/home/robert/test__pycache__/excluded/excluded")) - self.assertTrue(matches("/home/robert/test__pycache__/excluded/excluded/excluded.txt")) - self.assertFalse( - matches("/home/robert/test__pycache__/excluded/excluded/test_inverse") - ) # FIXME This file would be actually ignored. :( - self.assertTrue(matches("/home/robert/test__pycache__/some_file.txt")) - self.assertTrue(matches("/home/robert/test__pycache__/test")) - self.assertFalse(matches("/home/robert/.test_gitignore")) - self.assertTrue(matches("/home/robert/.test_venv")) - self.assertTrue(matches("/home/robert/.test_venv/some_file.txt")) - self.assertFalse(matches("/home/robert/not_excluded.txt")) - self.assertFalse(matches("/home/robert/not_excluded")) - self.assertTrue(matches("/home/robert/not_excluded/test__pycache__")) - self.assertFalse(matches("/home/robert/not_excluded/.test_gitignore")) - self.assertFalse(matches("/home/robert/not_excluded/excluded_not")) - self.assertFalse(matches("/home/robert/not_excluded/excluded_not/sub_excluded.txt")) - self.assertFalse(matches("/home/robert/not_excluded/excluded")) - self.assertFalse(matches("/home/robert/not_excluded/excluded/excluded.txt")) - self.assertFalse(matches("/home/robert/not_excluded/not_excluded2.txt")) - self.assertFalse(matches("/home/robert/not_excluded/not_excluded2")) - self.assertFalse(matches("/home/robert/not_excluded/not_excluded2/sub_excluded.txt")) - self.assertFalse(matches("/home/robert/not_excluded/excluded_not.txt")) - self.assertFalse(matches("/home/robert/.test_gitignore_empty")) + for is_dir in (False, True): + with self.subTest(i=is_dir): + self.assertTrue(matches("/home/robert/test__pycache__", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/test__pycache__/.test_gitignore", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/test__pycache__/excluded", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/test__pycache__/excluded/excluded", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/test__pycache__/excluded/excluded/excluded.txt", is_dir=is_dir)) + self.assertFalse( + matches("/home/robert/test__pycache__/excluded/excluded/test_inverse") + ) # FIXME This file would be actually ignored. :( + self.assertTrue(matches("/home/robert/test__pycache__/some_file.txt", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/test__pycache__/test", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/.test_gitignore", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/.test_venv/some_file.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded", is_dir=is_dir)) + self.assertTrue(matches("/home/robert/not_excluded/test__pycache__", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/.test_gitignore", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/excluded_not", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/excluded_not/sub_excluded.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/excluded", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/excluded/excluded.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/not_excluded2.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/not_excluded2", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/not_excluded2/sub_excluded.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/not_excluded/excluded_not.txt", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/.test_gitignore_empty", is_dir=is_dir)) + self.assertFalse(matches("/home/robert/.test_venv", is_dir=False)) + self.assertTrue(matches("/home/robert/.test_venv", is_dir=True)) def __parse_gitignore_string(self, data, fake_base_dir): with unittest.mock.patch("builtins.open", unittest.mock.mock_open(read_data="\n".join(data))): - return gitignorefile.parse(f"{fake_base_dir}/.gitignore", fake_base_dir) + return gitignorefile.parse(f"{fake_base_dir}/.gitignore", base_path=fake_base_dir)