Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make is_sub_path faster #17962

Merged
merged 5 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
get_mypy_comments,
hash_digest,
is_stub_package_file,
is_sub_path,
is_sub_path_normabs,
is_typeshed_file,
module_prefix,
read_py_file,
Expand Down Expand Up @@ -3544,10 +3544,9 @@ def is_silent_import_module(manager: BuildManager, path: str) -> bool:
if manager.options.no_silence_site_packages:
return False
# Silence errors in site-package dirs and typeshed
return any(
is_sub_path(path, dir)
for dir in manager.search_paths.package_path + manager.search_paths.typeshed_path
)
if any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.package_path):
return True
return any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.typeshed_path)


def write_undocumented_ref_info(
Expand Down
9 changes: 7 additions & 2 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,10 +669,13 @@ def mypy_path() -> list[str]:
def default_lib_path(
data_dir: str, pyversion: tuple[int, int], custom_typeshed_dir: str | None
) -> list[str]:
"""Return default standard library search paths."""
"""Return default standard library search paths. Guaranteed to be normalised."""

data_dir = os.path.abspath(data_dir)
path: list[str] = []

if custom_typeshed_dir:
custom_typeshed_dir = os.path.abspath(custom_typeshed_dir)
typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib")
mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions")
versions_file = os.path.join(typeshed_dir, "VERSIONS")
Expand Down Expand Up @@ -712,7 +715,7 @@ def default_lib_path(

@functools.lru_cache(maxsize=None)
def get_search_dirs(python_executable: str | None) -> tuple[list[str], list[str]]:
"""Find package directories for given python.
"""Find package directories for given python. Guaranteed to return absolute paths.

This runs a subprocess call, which generates a list of the directories in sys.path.
To avoid repeatedly calling a subprocess (which can be slow!) we
Expand Down Expand Up @@ -774,6 +777,7 @@ def compute_search_paths(
root_dir = os.getenv("MYPY_TEST_PREFIX", None)
if not root_dir:
root_dir = os.path.dirname(os.path.dirname(__file__))
root_dir = os.path.abspath(root_dir)
lib_path.appendleft(os.path.join(root_dir, "test-data", "unit", "lib-stub"))
# alt_lib_path is used by some tests to bypass the normal lib_path mechanics.
# If we don't have one, grab directories of source files.
Expand Down Expand Up @@ -830,6 +834,7 @@ def compute_search_paths(
return SearchPaths(
python_path=tuple(reversed(python_path)),
mypy_path=tuple(mypypath),
# package_path and typeshed_path must be normalised and absolute via os.path.abspath
package_path=tuple(sys_path + site_packages),
typeshed_path=tuple(lib_path),
)
Expand Down
24 changes: 20 additions & 4 deletions mypy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import hashlib
import io
import os
import pathlib
import re
import shutil
import sys
Expand Down Expand Up @@ -411,9 +410,26 @@ def replace_object_state(
pass


def is_sub_path(path1: str, path2: str) -> bool:
"""Given two paths, return if path1 is a sub-path of path2."""
return pathlib.Path(path2) in pathlib.Path(path1).parents
def is_sub_path_normabs(path: str, dir: str) -> bool:
"""Given two paths, return if path is a sub-path of dir.

Moral equivalent of: Path(dir) in Path(path).parents

Similar to the pathlib version:
- Treats paths case-sensitively
- Does not fully handle unnormalised paths (e.g. paths with "..")
- Does not handle a mix of absolute and relative paths
Unlike the pathlib version:
- Fast
- On Windows, assumes input has been slash normalised
- Handles even fewer unnormalised paths (e.g. paths with "." and "//")

As a result, callers should ensure that inputs have had os.path.abspath called on them
(note that os.path.abspath will normalise)
"""
if not dir.endswith(os.sep):
dir += os.sep
return path.startswith(dir)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this will work reliably on Windows, where path separators can be / or \, and these are largely equivalent? Should we normalize separators on Windows, or can we assume everything is normalized here? I noticed that you added a call to os.path.normcase, so maybe this is already ok.

Maybe update docstring to mention Windows-specific issues, and case insensitivity issues?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documented the shortcomings of the faster function + invariants we require in modulefinder. Also renamed the function so it sounds a little scarier



if sys.platform == "linux" or sys.platform == "darwin":
Expand Down
Loading