From 47770a1e91d096fd1c689eb0c78b0f9e76b43639 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 2 May 2023 22:51:18 +0100 Subject: [PATCH] GH-104104: Optimize `pathlib.Path.glob()` by avoiding repeated calls to `os.path.normcase()` (GH-104105) Use `re.IGNORECASE` to implement case-insensitive matching. This restores behaviour from before GH-31691. --- Lib/pathlib.py | 25 +++++++++++-------- ...-05-02-21-05-30.gh-issue-104104.9tjplT.rst | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8eb08949fa9b43..61e7f3e4430cae 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -59,6 +59,9 @@ def _is_wildcard_pattern(pat): # be looked up directly as a file. return "*" in pat or "?" in pat or "[" in pat +def _is_case_sensitive(flavour): + return flavour.normcase('Aa') == 'Aa' + # # Globbing helpers # @@ -100,15 +103,14 @@ def select_from(self, parent_path): is_dir = path_cls.is_dir exists = path_cls.exists scandir = path_cls._scandir - normcase = path_cls._flavour.normcase if not is_dir(parent_path): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir, normcase) + return self._select_from(parent_path, is_dir, exists, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, is_dir, exists, scandir): yield parent_path @@ -118,11 +120,11 @@ def __init__(self, name, child_parts, flavour): self.name = name _Selector.__init__(self, child_parts, flavour) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, is_dir, exists, scandir): try: path = parent_path._make_child_relpath(self.name) if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p except PermissionError: return @@ -131,10 +133,11 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): class _WildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour): - self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch + flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE + self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch _Selector.__init__(self, child_parts, flavour) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, is_dir, exists, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -153,9 +156,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): raise continue name = entry.name - if self.match(normcase(name)): + if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p except PermissionError: return @@ -187,13 +190,13 @@ def _iterate_directories(self, parent_path, is_dir, scandir): except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, is_dir, exists, scandir): try: yielded = set() try: successor_select = self.successor._select_from for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir, normcase): + for p in successor_select(starting_point, is_dir, exists, scandir): if p not in yielded: yield p yielded.add(p) diff --git a/Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst b/Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst new file mode 100644 index 00000000000000..935a0e2a2bff18 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst @@ -0,0 +1,2 @@ +Improve performance of :meth:`pathlib.Path.glob` by using +:data:`re.IGNORECASE` to implement case-insensitive matching.