Skip to content

Commit

Permalink
Let mypyc optimise os.path.join (#17949)
Browse files Browse the repository at this point in the history
See #17948

There's one call site which has varargs that I leave as os.path.join, it
doesn't show up on my profile. I do see the `endswith` on the profile,
we could try `path[-1] == '/'` instead (could save a few dozen
milliseconds)

In my work environment, this is about a 10% speedup:
```
λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy  -c "import torch" --no-incremental --python-executable /opt/oai/bin/python'
Benchmark 1: /tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy  -c "import torch" --no-incremental --python-executable /opt/oai/bin/python
  Time (mean ± σ):     30.842 s ±  0.119 s    [User: 26.383 s, System: 4.396 s]
  Range (min … max):   30.706 s … 30.927 s    3 runs
```
Compared to:
```
λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy  -c "import torch" --no-incremental --python-executable /opt/oai/bin/python'
Benchmark 1: /tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy  -c "import torch" --no-incremental --python-executable /opt/oai/bin/python
  Time (mean ± σ):     34.161 s ±  0.163 s    [User: 29.818 s, System: 4.289 s]
  Range (min … max):   34.013 s … 34.336 s    3 runs
```

In the toy "long" environment mentioned in the issue, this is about a 7%
speedup:
```
λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy  -c "import torch" --no-incremental --python-executable long/bin/python'
Benchmark 1: /tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy  -c "import torch" --no-incremental --python-executable long/bin/python
  Time (mean ± σ):     23.177 s ±  0.317 s    [User: 20.265 s, System: 2.873 s]
  Range (min … max):   22.815 s … 23.407 s    3 runs
```
Compared to:
```
λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --python-executable=long/bin/python --no-incremental'
Benchmark 1: /tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --python-executable=long/bin/python --no-incremental
  Time (mean ± σ):     24.838 s ±  0.237 s    [User: 22.038 s, System: 2.750 s]
  Range (min … max):   24.598 s … 25.073 s    3 runs
```

In the "clean" environment, this is a 1% speedup, but below the noise
floor.
  • Loading branch information
hauntsaninja committed Oct 20, 2024
1 parent 159974c commit e20aaee
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 15 deletions.
31 changes: 16 additions & 15 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from mypy.nodes import MypyFile
from mypy.options import Options
from mypy.stubinfo import approved_stub_package_exists
from mypy.util import os_path_join


# Paths to be searched in find_module().
Expand Down Expand Up @@ -205,7 +206,7 @@ def find_module_via_source_set(self, id: str) -> ModuleSearchResult | None:
d = os.path.dirname(p)
for _ in range(id.count(".")):
if not any(
self.fscache.isfile(os.path.join(d, "__init__" + x)) for x in PYTHON_EXTENSIONS
self.fscache.isfile(os_path_join(d, "__init__" + x)) for x in PYTHON_EXTENSIONS
):
return None
d = os.path.dirname(d)
Expand Down Expand Up @@ -249,7 +250,7 @@ def find_lib_path_dirs(self, id: str, lib_path: tuple[str, ...]) -> PackageDirs:
dirs = []
for pathitem in self.get_toplevel_possibilities(lib_path, components[0]):
# e.g., '/usr/lib/python3.4/foo/bar'
dir = os.path.normpath(os.path.join(pathitem, dir_chain))
dir = os.path.normpath(os_path_join(pathitem, dir_chain))
if self.fscache.isdir(dir):
dirs.append((dir, True))
return dirs
Expand Down Expand Up @@ -320,8 +321,8 @@ def _find_module_non_stub_helper(
plausible_match = False
dir_path = pkg_dir
for index, component in enumerate(components):
dir_path = os.path.join(dir_path, component)
if self.fscache.isfile(os.path.join(dir_path, "py.typed")):
dir_path = os_path_join(dir_path, component)
if self.fscache.isfile(os_path_join(dir_path, "py.typed")):
return os.path.join(pkg_dir, *components[:-1]), index == 0
elif not plausible_match and (
self.fscache.isdir(dir_path) or self.fscache.isfile(dir_path + ".py")
Expand Down Expand Up @@ -418,9 +419,9 @@ def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
# Third-party stub/typed packages
for pkg_dir in self.search_paths.package_path:
stub_name = components[0] + "-stubs"
stub_dir = os.path.join(pkg_dir, stub_name)
stub_dir = os_path_join(pkg_dir, stub_name)
if fscache.isdir(stub_dir):
stub_typed_file = os.path.join(stub_dir, "py.typed")
stub_typed_file = os_path_join(stub_dir, "py.typed")
stub_components = [stub_name] + components[1:]
path = os.path.join(pkg_dir, *stub_components[:-1])
if fscache.isdir(path):
Expand All @@ -430,7 +431,7 @@ def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
# Partial here means that mypy should look at the runtime
# package if installed.
if fscache.read(stub_typed_file).decode().strip() == "partial":
runtime_path = os.path.join(pkg_dir, dir_chain)
runtime_path = os_path_join(pkg_dir, dir_chain)
third_party_inline_dirs.append((runtime_path, True))
# if the package is partial, we don't verify the module, as
# the partial stub package may not have a __init__.pyi
Expand Down Expand Up @@ -580,7 +581,7 @@ def find_modules_recursive(self, module: str) -> list[BuildSource]:
# Skip certain names altogether
if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."):
continue
subpath = os.path.join(package_path, name)
subpath = os_path_join(package_path, name)

if self.options and matches_exclude(
subpath, self.options.exclude, self.fscache, self.options.verbosity >= 2
Expand All @@ -590,8 +591,8 @@ def find_modules_recursive(self, module: str) -> list[BuildSource]:
if self.fscache.isdir(subpath):
# Only recurse into packages
if (self.options and self.options.namespace_packages) or (
self.fscache.isfile(os.path.join(subpath, "__init__.py"))
or self.fscache.isfile(os.path.join(subpath, "__init__.pyi"))
self.fscache.isfile(os_path_join(subpath, "__init__.py"))
or self.fscache.isfile(os_path_join(subpath, "__init__.pyi"))
):
seen.add(name)
sources.extend(self.find_modules_recursive(module + "." + name))
Expand Down Expand Up @@ -636,7 +637,7 @@ def verify_module(fscache: FileSystemCache, id: str, path: str, prefix: str) ->
for i in range(id.count(".")):
path = os.path.dirname(path)
if not any(
fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
fscache.isfile_case(os_path_join(path, f"__init__{extension}"), prefix)
for extension in PYTHON_EXTENSIONS
):
return False
Expand All @@ -651,7 +652,7 @@ def highest_init_level(fscache: FileSystemCache, id: str, path: str, prefix: str
for i in range(id.count(".")):
path = os.path.dirname(path)
if any(
fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
fscache.isfile_case(os_path_join(path, f"__init__{extension}"), prefix)
for extension in PYTHON_EXTENSIONS
):
level = i + 1
Expand Down Expand Up @@ -842,11 +843,11 @@ def load_stdlib_py_versions(custom_typeshed_dir: str | None) -> StdlibVersions:
None means there is no maximum version.
"""
typeshed_dir = custom_typeshed_dir or os.path.join(os.path.dirname(__file__), "typeshed")
stdlib_dir = os.path.join(typeshed_dir, "stdlib")
typeshed_dir = custom_typeshed_dir or os_path_join(os.path.dirname(__file__), "typeshed")
stdlib_dir = os_path_join(typeshed_dir, "stdlib")
result = {}

versions_path = os.path.join(stdlib_dir, "VERSIONS")
versions_path = os_path_join(stdlib_dir, "VERSIONS")
assert os.path.isfile(versions_path), (custom_typeshed_dir, versions_path, __file__)
with open(versions_path) as f:
for line in f:
Expand Down
17 changes: 17 additions & 0 deletions mypy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,23 @@ def is_sub_path(path1: str, path2: str) -> bool:
return pathlib.Path(path2) in pathlib.Path(path1).parents


if sys.platform == "linux" or sys.platform == "darwin":

def os_path_join(path: str, b: str) -> str:
# Based off of os.path.join, but simplified to str-only, 2 args and mypyc can compile it.
if b.startswith("/") or not path:
return b
elif path.endswith("/"):
return path + b
else:
return path + "/" + b

else:

def os_path_join(a: str, p: str) -> str:
return os.path.join(a, p)


def hard_exit(status: int = 0) -> None:
"""Kill the current process without fully cleaning up.
Expand Down

0 comments on commit e20aaee

Please sign in to comment.