Skip to content

Commit

Permalink
Lots of fixes, performance improvements and refactorings.
Browse files Browse the repository at this point in the history
  • Loading branch information
excitoon committed Aug 27, 2022
1 parent f44b62e commit 6763d47
Show file tree
Hide file tree
Showing 5 changed files with 377 additions and 233 deletions.
175 changes: 106 additions & 69 deletions gitignorefile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,40 @@
import re


def parse(full_path, base_dir=None):
if base_dir is None:
base_dir = os.path.dirname(full_path)
def parse(full_path, base_path=None):
if base_path is None:
base_path = os.path.dirname(full_path) or os.path.dirname(os.path.abspath(full_path))

rules = []
with open(full_path) as ignore_file:
for i, line in enumerate(ignore_file, start=1):
line = line.rstrip("\r\n")
rule = _rule_from_pattern(line, base_path=os.path.abspath(base_dir), source=(full_path, i))
rule = _rule_from_pattern(line, source=(full_path, i))
if rule:
rules.append(rule)

if not any((r.negation for r in rules)):
return lambda file_path: any((r.match(file_path) for r in rules))
# TODO probably combine to single regexp.

else:
# We have negation rules. We can't use a simple "any" to evaluate them.
# Later rules override earlier rules.
return lambda file_path: _handle_negation(file_path, rules)
# We have negation rules. We can't use a simple "any" to evaluate them.
# Later rules override earlier rules.
return lambda file_path, is_dir=None: _handle_negation(file_path, rules, base_path=base_path, is_dir=is_dir)


def ignore():
matches = Cache()
return lambda root, names: {name for name in names if matches(os.path.join(root, name))}


def ignored(path):
return Cache()(path)
def ignored(path, is_dir=None):
return Cache()(path, is_dir=is_dir)


class Cache:
def __init__(self):
self.__gitignores = {}

def __get_parents(self, path):
if not os.path.isdir(path):
def __get_parents(self, path, is_dir):
if not is_dir:
path = os.path.dirname(path)
yield path

Expand All @@ -50,15 +48,18 @@ def __get_parents(self, path):
else:
break

def __call__(self, path):
def __call__(self, path, is_dir=None):
if is_dir is None:
is_dir = os.path.isdir(path)

add_to_children = {}
plain_paths = []
for parent in self.__get_parents(os.path.abspath(path)):
for parent in self.__get_parents(os.path.abspath(path), is_dir=is_dir):
if parent in self.__gitignores:
break

elif os.path.isfile(os.path.join(parent, ".gitignore")):
p = parse(os.path.join(parent, ".gitignore"), base_dir=parent)
p = parse(os.path.join(parent, ".gitignore"), base_path=parent)
add_to_children[parent] = (p, plain_paths)
plain_paths = []

Expand All @@ -83,49 +84,69 @@ def __call__(self, path):
for plain_path in parent_plain_paths:
self.__gitignores[plain_path] = self.__gitignores[parent]

return any((m(path) for m in self.__gitignores[parent])) # This parent comes either from first or second loop.
return any(
(m(path, is_dir=is_dir) for m in self.__gitignores[parent])
) # This parent comes either from first or second loop.


def _handle_negation(file_path, rules, base_path=None, is_dir=None):
"""
Because Git allows for nested `.gitignore` files, a `base_path` value
is required for correct behavior.
"""
return_immediately = not any((r.negation for r in rules))

if is_dir is None:
is_dir = os.path.isdir(file_path)

if base_path is not None:
rel_path = os.path.relpath(file_path, base_path)
else:
rel_path = file_path

if rel_path.startswith(f".{os.sep}"):
rel_path = rel_path[2:]

def _handle_negation(file_path, rules):
matched = False
for rule in rules:
if rule.match(file_path):
if rule.negation:
matched = False
else:
matched = True
return matched
if rule.match(rel_path, is_dir):
matched = not rule.negation
if matched and return_immediately:
return True

else:
return matched


def _rule_from_pattern(pattern, base_path=None, source=None):
def _rule_from_pattern(pattern, source=None):
"""
Take a .gitignore match pattern, such as "*.py[cod]" or "**/*.bak",
and return an _IgnoreRule suitable for matching against files and
Take a `.gitignore` match pattern, such as "*.py[cod]" or "**/*.bak",
and return an `_IgnoreRule` suitable for matching against files and
directories. Patterns which do not match files, such as comments
and blank lines, will return None.
Because git allows for nested .gitignore files, a base_path value
is required for correct behavior. The base path should be absolute.
and blank lines, will return `None`.
"""
if base_path and base_path != os.path.abspath(base_path):
raise ValueError("base_path must be absolute")
# Store the exact pattern for our repr and string functions
orig_pattern = pattern

# Early returns follow
# Discard comments and separators
if not pattern.lstrip() or pattern.lstrip().startswith("#"):
return

# Discard anything with more than two consecutive asterisks
if "***" in pattern:
return

# Strip leading bang before examining double asterisks
if pattern[0] == "!":
if pattern.startswith("!"):
negation = True
pattern = pattern[1:]
else:
negation = False

# Discard anything with invalid double-asterisks -- they can appear
# at the start or the end, or be surrounded by slashes
for m in re.finditer(r"\*\*", pattern):
for m in re.finditer("\\*\\*", pattern):
start_index = m.start()
if (
start_index != 0
Expand All @@ -139,9 +160,11 @@ def _rule_from_pattern(pattern, base_path=None, source=None):
return

directory_only = pattern.endswith("/")
# A slash is a sign that we're tied to the base_path of our rule

# A slash is a sign that we're tied to the `base_path` of our rule
# set.
anchored = "/" in pattern[:-1]

if pattern.startswith("/"):
pattern = pattern[1:]
if pattern.startswith("**"):
Expand All @@ -151,9 +174,11 @@ def _rule_from_pattern(pattern, base_path=None, source=None):
pattern = pattern[1:]
if pattern.endswith("/"):
pattern = pattern[:-1]

# patterns with leading hashes are escaped with a backslash in front, unescape it
if pattern.startswith("\\#"):
pattern = pattern[1:]

# trailing spaces are ignored unless they are escaped with a backslash
i = len(pattern) - 1
striptrailingspaces = True
Expand All @@ -166,27 +191,28 @@ def _rule_from_pattern(pattern, base_path=None, source=None):
if striptrailingspaces:
pattern = pattern[:i]
i -= 1
regex = _fnmatch_pathname_to_regex(pattern, directory_only)

regexp = _fnmatch_pathname_to_regexp(pattern, directory_only)

if anchored:
regex = f"^{regex}"
regexp = f"^{regexp}"

return _IgnoreRule(
pattern=orig_pattern,
regex=regex,
regexp=regexp,
negation=negation,
directory_only=directory_only,
anchored=anchored,
base_path=base_path,
source=source,
)


_IGNORE_RULE_FIELDS = [
"pattern",
"regex", # Basic values
"regexp", # Basic values
"negation",
"directory_only",
"anchored", # Behavior flags
"base_path", # Meaningful for gitignore-style behavior
"source", # (file, line) tuple for reporting
]

Expand All @@ -198,38 +224,39 @@ def __str__(self):
def __repr__(self):
return "".join(["_IgnoreRule('", self.pattern, "')"])

def match(self, abs_path):
matched = False
if self.base_path:
rel_path = str(os.path.relpath(abs_path, self.base_path))
else:
rel_path = str(abs_path)
seps_group, _ = _seps_non_sep_expr()
if rel_path.startswith(f".{seps_group}"):
rel_path = rel_path[2:]
if re.search(self.regex, rel_path):
matched = True
return matched
def match(self, rel_path, is_dir):
match = re.search(self.regexp, rel_path)

# If we need a directory, check there is something after slash and if there is not, target must be a directory.
# If there is something after slash then it's a directory irrelevant to type of target.
# `self.directory_only` implies we have group number 1.
# N.B. Question mark inside a group without a name can shift indices. :(
return match and (not self.directory_only or match.group(1) is not None or is_dir)


def _seps_non_sep_expr():
seps = [re.escape(os.sep)]
if os.altsep is not None:
seps.append(re.escape(os.altsep))
return "[" + "|".join(seps) + "]", "[^{}]".format("|".join(seps))
if os.altsep is None:
seps = re.escape(os.sep)
non_sep = f"[^{re.escape(os.sep)}]"

else:
seps = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]"
non_sep = f"[^{re.escape(os.sep)}{re.escape(os.altsep)}]"

return seps, non_sep


# Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
# option that `.gitignore`'s behavior depends on.
def _fnmatch_pathname_to_regex(pattern, directory_only):
def _fnmatch_pathname_to_regexp(pattern, directory_only):
"""
Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged;
the path separator will not match shell-style '*' and '.' wildcards.
"""
i, n = 0, len(pattern)

seps_group, nonsep = _seps_non_sep_expr()
res = [f"(^|{seps_group})"]
seps_group, non_sep = _seps_non_sep_expr()
res = [f"(?:^|{seps_group})"] if pattern else [] # Empty name means no path fragment.
while i < n:
c = pattern[i]
i += 1
Expand All @@ -242,13 +269,16 @@ def _fnmatch_pathname_to_regex(pattern, directory_only):
i += 1
res.append(f"{seps_group}?")
else:
res.append(f"{nonsep}*")
res.append(f"{non_sep}*")
except IndexError:
res.append(f"{nonsep}*")
res.append(f"{non_sep}*")

elif c == "?":
res.append(nonsep)
res.append(non_sep)

elif c == "/":
res.append(seps_group)

elif c == "[":
j = i
if j < n and pattern[j] == "!":
Expand All @@ -257,18 +287,25 @@ def _fnmatch_pathname_to_regex(pattern, directory_only):
j += 1
while j < n and pattern[j] != "]":
j += 1

if j >= n:
res.append("\\[")
else:
stuff = pattern[i:j].replace("\\", "\\\\")
i = j + 1
if stuff[0] == "!":
stuff = "".join(["^", stuff[1:]])
stuff = f"^{stuff[1:]}"
elif stuff[0] == "^":
stuff = "".join("\\" + stuff)
res.append("[{}]".format(stuff))
stuff = f"\\{stuff}"
res.append(f"[{stuff}]")

else:
res.append(re.escape(c))
if not directory_only:
res.append(f"({seps_group}|$)")

if directory_only: # In this case we are interested if there is something after slash.
res.append(f"({seps_group}.+)?$")

else:
res.append(f"(?:{seps_group}|$)")

return "".join(res)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
with open(f"{os.path.dirname(os.path.abspath(__file__))}/README.md") as readme:
setuptools.setup(
name="gitignorefile",
version="1.0.4",
version="1.0.5",
description="A spec-compliant `.gitignore` parser for Python",
long_description=readme.read(),
long_description_content_type="text/markdown",
Expand Down
Loading

0 comments on commit 6763d47

Please sign in to comment.