From b80bf22c4d34d3bb1a68d8a44bddc566d2bde4c4 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 31 May 2024 19:18:53 -0400 Subject: [PATCH 01/25] Omit `red-knot` PRs from the changelog (#11666) ## Summary This just ensures that PRs labelled with `red-knot` are automatically filtered out from the auto-generated changelog (which we then manually finalize anyway). --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 167a6c5d60465..bc5d4a99bc763 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,7 @@ major_labels = [] # Ruff never uses the major version number minor_labels = ["breaking"] # Bump the minor version on breaking changes version_tag_prefix = "v" -changelog_ignore_labels = ["internal", "ci"] +changelog_ignore_labels = ["internal", "ci", "red-knot"] changelog_sections.breaking = "Breaking changes" changelog_sections.preview = "Preview features" From 99834ee93d4c5476d928fd6864950dcd38e96f77 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 22:26:20 -0600 Subject: [PATCH 02/25] Sync vendored typeshed stubs (#11668) Close and reopen this PR to trigger CI Co-authored-by: typeshedbot <> --- .../vendor/typeshed/source_commit.txt | 2 +- .../red_knot/vendor/typeshed/stdlib/VERSIONS | 2 +- .../red_knot/vendor/typeshed/stdlib/_ast.pyi | 683 +++++++++++++++++- .../vendor/typeshed/stdlib/_ctypes.pyi | 2 +- .../vendor/typeshed/stdlib/_socket.pyi | 4 +- .../red_knot/vendor/typeshed/stdlib/_stat.pyi | 40 +- .../typeshed/stdlib/_typeshed/__init__.pyi | 2 + .../vendor/typeshed/stdlib/_weakref.pyi | 2 +- .../vendor/typeshed/stdlib/_weakrefset.pyi | 2 +- .../vendor/typeshed/stdlib/argparse.pyi | 264 +++++-- .../red_knot/vendor/typeshed/stdlib/array.pyi | 2 +- .../typeshed/stdlib/asyncio/__init__.pyi | 10 +- .../vendor/typeshed/stdlib/asyncio/events.pyi | 4 +- .../typeshed/stdlib/asyncio/futures.pyi | 2 +- .../vendor/typeshed/stdlib/asyncio/queues.pyi | 2 +- .../vendor/typeshed/stdlib/asyncio/tasks.pyi | 2 +- .../vendor/typeshed/stdlib/atexit.pyi | 4 +- .../vendor/typeshed/stdlib/base64.pyi | 6 + .../vendor/typeshed/stdlib/builtins.pyi | 57 +- .../vendor/typeshed/stdlib/calendar.pyi | 58 +- .../red_knot/vendor/typeshed/stdlib/code.pyi | 35 +- .../stdlib/concurrent/futures/_base.pyi | 2 +- .../stdlib/concurrent/futures/thread.pyi | 2 +- .../vendor/typeshed/stdlib/contextvars.pyi | 4 +- .../red_knot/vendor/typeshed/stdlib/csv.pyi | 8 +- .../typeshed/stdlib/ctypes/__init__.pyi | 2 +- .../vendor/typeshed/stdlib/dataclasses.pyi | 10 +- .../vendor/typeshed/stdlib/datetime.pyi | 32 + .../vendor/typeshed/stdlib/difflib.pyi | 2 +- .../red_knot/vendor/typeshed/stdlib/dis.pyi | 17 +- .../stdlib/distutils/archive_util.pyi | 29 +- .../typeshed/stdlib/distutils/ccompiler.pyi | 53 +- .../vendor/typeshed/stdlib/distutils/cmd.pyi | 71 +- .../stdlib/distutils/command/bdist_msi.pyi | 8 +- .../stdlib/distutils/command/build.pyi | 6 +- .../stdlib/distutils/command/build_py.pyi | 4 +- .../stdlib/distutils/command/check.pyi | 4 +- .../stdlib/distutils/command/config.pyi | 9 +- .../stdlib/distutils/command/install.pyi | 6 +- .../stdlib/distutils/command/register.pyi | 6 +- .../stdlib/distutils/command/sdist.pyi | 6 +- .../vendor/typeshed/stdlib/distutils/core.pyi | 4 +- .../typeshed/stdlib/distutils/dep_util.pyi | 17 +- .../typeshed/stdlib/distutils/dir_util.pyi | 30 +- .../vendor/typeshed/stdlib/distutils/dist.pyi | 13 +- .../typeshed/stdlib/distutils/file_util.pyi | 46 +- .../typeshed/stdlib/distutils/filelist.pyi | 14 +- .../typeshed/stdlib/distutils/spawn.pyi | 6 +- .../typeshed/stdlib/distutils/sysconfig.pyi | 6 +- .../typeshed/stdlib/distutils/text_file.pyi | 14 +- .../vendor/typeshed/stdlib/distutils/util.pyi | 14 +- .../vendor/typeshed/stdlib/faulthandler.pyi | 2 +- .../vendor/typeshed/stdlib/filecmp.pyi | 2 +- .../vendor/typeshed/stdlib/fileinput.pyi | 2 +- .../vendor/typeshed/stdlib/functools.pyi | 6 +- .../vendor/typeshed/stdlib/genericpath.pyi | 7 + .../vendor/typeshed/stdlib/graphlib.pyi | 2 +- .../red_knot/vendor/typeshed/stdlib/gzip.pyi | 6 +- .../vendor/typeshed/stdlib/http/__init__.pyi | 15 +- .../vendor/typeshed/stdlib/http/cookies.pyi | 2 +- .../stdlib/importlib/metadata/__init__.pyi | 5 +- .../vendor/typeshed/stdlib/inspect.pyi | 6 + crates/red_knot/vendor/typeshed/stdlib/io.pyi | 2 +- .../vendor/typeshed/stdlib/ipaddress.pyi | 6 +- .../vendor/typeshed/stdlib/itertools.pyi | 58 ++ .../vendor/typeshed/stdlib/json/encoder.pyi | 4 +- .../vendor/typeshed/stdlib/keyword.pyi | 4 +- .../vendor/typeshed/stdlib/locale.pyi | 8 +- .../typeshed/stdlib/logging/__init__.pyi | 109 +-- .../typeshed/stdlib/logging/handlers.pyi | 2 +- .../vendor/typeshed/stdlib/mailbox.pyi | 4 +- .../vendor/typeshed/stdlib/marshal.pyi | 16 +- .../red_knot/vendor/typeshed/stdlib/math.pyi | 3 + .../vendor/typeshed/stdlib/mimetypes.pyi | 9 + .../red_knot/vendor/typeshed/stdlib/mmap.pyi | 2 + .../stdlib/multiprocessing/managers.pyi | 6 +- .../typeshed/stdlib/multiprocessing/pool.pyi | 2 +- .../stdlib/multiprocessing/queues.pyi | 2 +- .../stdlib/multiprocessing/shared_memory.pyi | 8 +- .../vendor/typeshed/stdlib/ntpath.pyi | 9 +- .../vendor/typeshed/stdlib/opcode.pyi | 8 +- .../vendor/typeshed/stdlib/optparse.pyi | 2 +- .../vendor/typeshed/stdlib/os/__init__.pyi | 2 +- .../vendor/typeshed/stdlib/pathlib.pyi | 55 +- .../red_knot/vendor/typeshed/stdlib/pdb.pyi | 4 +- .../vendor/typeshed/stdlib/platform.pyi | 25 + .../vendor/typeshed/stdlib/posixpath.pyi | 5 + .../red_knot/vendor/typeshed/stdlib/pydoc.pyi | 109 ++- .../red_knot/vendor/typeshed/stdlib/queue.pyi | 6 +- .../vendor/typeshed/stdlib/random.pyi | 5 +- crates/red_knot/vendor/typeshed/stdlib/re.pyi | 29 +- .../vendor/typeshed/stdlib/shutil.pyi | 61 +- .../vendor/typeshed/stdlib/signal.pyi | 2 +- .../vendor/typeshed/stdlib/sqlite3/dbapi2.pyi | 6 +- .../red_knot/vendor/typeshed/stdlib/stat.pyi | 6 + .../vendor/typeshed/stdlib/statistics.pyi | 31 +- .../vendor/typeshed/stdlib/subprocess.pyi | 4 +- .../vendor/typeshed/stdlib/sys/__init__.pyi | 21 +- .../vendor/typeshed/stdlib/syslog.pyi | 9 + .../vendor/typeshed/stdlib/tarfile.pyi | 53 +- .../vendor/typeshed/stdlib/tempfile.pyi | 4 +- .../vendor/typeshed/stdlib/threading.pyi | 3 + .../red_knot/vendor/typeshed/stdlib/time.pyi | 5 +- .../red_knot/vendor/typeshed/stdlib/token.pyi | 9 +- .../vendor/typeshed/stdlib/tokenize.pyi | 13 +- .../vendor/typeshed/stdlib/traceback.pyi | 72 +- .../red_knot/vendor/typeshed/stdlib/types.pyi | 13 +- .../vendor/typeshed/stdlib/typing.pyi | 67 +- .../typeshed/stdlib/typing_extensions.pyi | 195 ++--- .../vendor/typeshed/stdlib/unittest/case.pyi | 2 +- .../vendor/typeshed/stdlib/urllib/parse.pyi | 2 +- .../vendor/typeshed/stdlib/urllib/request.pyi | 27 +- .../vendor/typeshed/stdlib/warnings.pyi | 14 +- .../vendor/typeshed/stdlib/wsgiref/util.pyi | 2 + 114 files changed, 2217 insertions(+), 593 deletions(-) diff --git a/crates/red_knot/vendor/typeshed/source_commit.txt b/crates/red_knot/vendor/typeshed/source_commit.txt index 7bcdaf4da02e0..2820bb911ff87 100644 --- a/crates/red_knot/vendor/typeshed/source_commit.txt +++ b/crates/red_knot/vendor/typeshed/source_commit.txt @@ -1 +1 @@ -a9d7e861f7a46ae7acd56569326adef302e10f29 +4b6558c12ac43cd40716cd6452fe98a632ae65d7 diff --git a/crates/red_knot/vendor/typeshed/stdlib/VERSIONS b/crates/red_knot/vendor/typeshed/stdlib/VERSIONS index deb940395e1e6..a8526aab9422c 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/VERSIONS +++ b/crates/red_knot/vendor/typeshed/stdlib/VERSIONS @@ -166,7 +166,7 @@ ipaddress: 3.3- itertools: 3.0- json: 3.0- keyword: 3.0- -lib2to3: 3.0- +lib2to3: 3.0-3.12 linecache: 3.0- locale: 3.0- logging: 3.0- diff --git a/crates/red_knot/vendor/typeshed/stdlib/_ast.pyi b/crates/red_knot/vendor/typeshed/stdlib/_ast.pyi index 0758450dfa7cb..51791b4099d5c 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_ast.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_ast.pyi @@ -1,29 +1,35 @@ import sys import typing_extensions -from typing import Any, ClassVar, Literal +from typing import Any, ClassVar, Generic, Literal, TypedDict, overload +from typing_extensions import Unpack PyCF_ONLY_AST: Literal[1024] PyCF_TYPE_COMMENTS: Literal[4096] PyCF_ALLOW_TOP_LEVEL_AWAIT: Literal[8192] +# Used for node end positions in constructor keyword arguments +_EndPositionT = typing_extensions.TypeVar("_EndPositionT", int, int | None, default=int | None) # noqa: Y023 + # Alias used for fields that must always be valid identifiers # A string `x` counts as a valid identifier if both the following are True # (1) `x.isidentifier()` evaluates to `True` # (2) `keyword.iskeyword(x)` evaluates to `False` _Identifier: typing_extensions.TypeAlias = str +# Corresponds to the names in the `_attributes` class variable which is non-empty in certain AST nodes +class _Attributes(TypedDict, Generic[_EndPositionT], total=False): + lineno: int + col_offset: int + end_lineno: _EndPositionT + end_col_offset: _EndPositionT + class AST: if sys.version_info >= (3, 10): __match_args__ = () _attributes: ClassVar[tuple[str, ...]] _fields: ClassVar[tuple[str, ...]] - def __init__(self, *args: Any, **kwargs: Any) -> None: ... - # TODO: Not all nodes have all of the following attributes - lineno: int - col_offset: int - end_lineno: int | None - end_col_offset: int | None - type_comment: str | None + if sys.version_info >= (3, 13): + _field_types: ClassVar[dict[str, Any]] class mod(AST): ... class type_ignore(AST): ... @@ -31,31 +37,54 @@ class type_ignore(AST): ... class TypeIgnore(type_ignore): if sys.version_info >= (3, 10): __match_args__ = ("lineno", "tag") + lineno: int tag: str + def __init__(self, lineno: int, tag: str) -> None: ... class FunctionType(mod): if sys.version_info >= (3, 10): __match_args__ = ("argtypes", "returns") argtypes: list[expr] returns: expr + if sys.version_info >= (3, 13): + @overload + def __init__(self, argtypes: list[expr], returns: expr) -> None: ... + @overload + def __init__(self, argtypes: list[expr] = ..., *, returns: expr) -> None: ... + else: + def __init__(self, argtypes: list[expr], returns: expr) -> None: ... class Module(mod): if sys.version_info >= (3, 10): __match_args__ = ("body", "type_ignores") body: list[stmt] type_ignores: list[TypeIgnore] + if sys.version_info >= (3, 13): + def __init__(self, body: list[stmt] = ..., type_ignores: list[TypeIgnore] = ...) -> None: ... + else: + def __init__(self, body: list[stmt], type_ignores: list[TypeIgnore]) -> None: ... class Interactive(mod): if sys.version_info >= (3, 10): __match_args__ = ("body",) body: list[stmt] + if sys.version_info >= (3, 13): + def __init__(self, body: list[stmt] = ...) -> None: ... + else: + def __init__(self, body: list[stmt]) -> None: ... class Expression(mod): if sys.version_info >= (3, 10): __match_args__ = ("body",) body: expr + def __init__(self, body: expr) -> None: ... -class stmt(AST): ... +class stmt(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None + def __init__(self, **kwargs: Unpack[_Attributes]) -> None: ... class FunctionDef(stmt): if sys.version_info >= (3, 12): @@ -67,8 +96,58 @@ class FunctionDef(stmt): body: list[stmt] decorator_list: list[expr] returns: expr | None + type_comment: str | None if sys.version_info >= (3, 12): type_params: list[type_param] + if sys.version_info >= (3, 13): + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt] = ..., + decorator_list: list[expr] = ..., + returns: expr | None = None, + type_comment: str | None = None, + type_params: list[type_param] = ..., + **kwargs: Unpack[_Attributes], + ) -> None: ... + elif sys.version_info >= (3, 12): + @overload + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None, + type_comment: str | None, + type_params: list[type_param], + **kwargs: Unpack[_Attributes], + ) -> None: ... + @overload + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None = None, + type_comment: str | None = None, + *, + type_params: list[type_param], + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None = None, + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... class AsyncFunctionDef(stmt): if sys.version_info >= (3, 12): @@ -80,8 +159,58 @@ class AsyncFunctionDef(stmt): body: list[stmt] decorator_list: list[expr] returns: expr | None + type_comment: str | None if sys.version_info >= (3, 12): type_params: list[type_param] + if sys.version_info >= (3, 13): + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt] = ..., + decorator_list: list[expr] = ..., + returns: expr | None = None, + type_comment: str | None = None, + type_params: list[type_param] = ..., + **kwargs: Unpack[_Attributes], + ) -> None: ... + elif sys.version_info >= (3, 12): + @overload + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None, + type_comment: str | None, + type_params: list[type_param], + **kwargs: Unpack[_Attributes], + ) -> None: ... + @overload + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None = None, + type_comment: str | None = None, + *, + type_params: list[type_param], + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + name: _Identifier, + args: arguments, + body: list[stmt], + decorator_list: list[expr], + returns: expr | None = None, + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... class ClassDef(stmt): if sys.version_info >= (3, 12): @@ -95,22 +224,73 @@ class ClassDef(stmt): decorator_list: list[expr] if sys.version_info >= (3, 12): type_params: list[type_param] + if sys.version_info >= (3, 13): + def __init__( + self, + name: _Identifier, + bases: list[expr] = ..., + keywords: list[keyword] = ..., + body: list[stmt] = ..., + decorator_list: list[expr] = ..., + type_params: list[type_param] = ..., + **kwargs: Unpack[_Attributes], + ) -> None: ... + elif sys.version_info >= (3, 12): + def __init__( + self, + name: _Identifier, + bases: list[expr], + keywords: list[keyword], + body: list[stmt], + decorator_list: list[expr], + type_params: list[type_param], + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + name: _Identifier, + bases: list[expr], + keywords: list[keyword], + body: list[stmt], + decorator_list: list[expr], + **kwargs: Unpack[_Attributes], + ) -> None: ... class Return(stmt): if sys.version_info >= (3, 10): __match_args__ = ("value",) value: expr | None + def __init__(self, value: expr | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class Delete(stmt): if sys.version_info >= (3, 10): __match_args__ = ("targets",) targets: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, targets: list[expr] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, targets: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class Assign(stmt): if sys.version_info >= (3, 10): __match_args__ = ("targets", "value", "type_comment") targets: list[expr] value: expr + type_comment: str | None + if sys.version_info >= (3, 13): + @overload + def __init__( + self, targets: list[expr], value: expr, type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... + @overload + def __init__( + self, targets: list[expr] = ..., *, value: expr, type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__( + self, targets: list[expr], value: expr, type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... class AugAssign(stmt): if sys.version_info >= (3, 10): @@ -118,6 +298,9 @@ class AugAssign(stmt): target: Name | Attribute | Subscript op: operator value: expr + def __init__( + self, target: Name | Attribute | Subscript, op: operator, value: expr, **kwargs: Unpack[_Attributes] + ) -> None: ... class AnnAssign(stmt): if sys.version_info >= (3, 10): @@ -126,6 +309,25 @@ class AnnAssign(stmt): annotation: expr value: expr | None simple: int + @overload + def __init__( + self, + target: Name | Attribute | Subscript, + annotation: expr, + value: expr | None, + simple: int, + **kwargs: Unpack[_Attributes], + ) -> None: ... + @overload + def __init__( + self, + target: Name | Attribute | Subscript, + annotation: expr, + value: expr | None = None, + *, + simple: int, + **kwargs: Unpack[_Attributes], + ) -> None: ... class For(stmt): if sys.version_info >= (3, 10): @@ -134,6 +336,27 @@ class For(stmt): iter: expr body: list[stmt] orelse: list[stmt] + type_comment: str | None + if sys.version_info >= (3, 13): + def __init__( + self, + target: expr, + iter: expr, + body: list[stmt] = ..., + orelse: list[stmt] = ..., + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + target: expr, + iter: expr, + body: list[stmt], + orelse: list[stmt], + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... class AsyncFor(stmt): if sys.version_info >= (3, 10): @@ -142,6 +365,27 @@ class AsyncFor(stmt): iter: expr body: list[stmt] orelse: list[stmt] + type_comment: str | None + if sys.version_info >= (3, 13): + def __init__( + self, + target: expr, + iter: expr, + body: list[stmt] = ..., + orelse: list[stmt] = ..., + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + target: expr, + iter: expr, + body: list[stmt], + orelse: list[stmt], + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... class While(stmt): if sys.version_info >= (3, 10): @@ -149,6 +393,12 @@ class While(stmt): test: expr body: list[stmt] orelse: list[stmt] + if sys.version_info >= (3, 13): + def __init__( + self, test: expr, body: list[stmt] = ..., orelse: list[stmt] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__(self, test: expr, body: list[stmt], orelse: list[stmt], **kwargs: Unpack[_Attributes]) -> None: ... class If(stmt): if sys.version_info >= (3, 10): @@ -156,24 +406,57 @@ class If(stmt): test: expr body: list[stmt] orelse: list[stmt] + if sys.version_info >= (3, 13): + def __init__( + self, test: expr, body: list[stmt] = ..., orelse: list[stmt] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__(self, test: expr, body: list[stmt], orelse: list[stmt], **kwargs: Unpack[_Attributes]) -> None: ... class With(stmt): if sys.version_info >= (3, 10): __match_args__ = ("items", "body", "type_comment") items: list[withitem] body: list[stmt] + type_comment: str | None + if sys.version_info >= (3, 13): + def __init__( + self, + items: list[withitem] = ..., + body: list[stmt] = ..., + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, items: list[withitem], body: list[stmt], type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... class AsyncWith(stmt): if sys.version_info >= (3, 10): __match_args__ = ("items", "body", "type_comment") items: list[withitem] body: list[stmt] + type_comment: str | None + if sys.version_info >= (3, 13): + def __init__( + self, + items: list[withitem] = ..., + body: list[stmt] = ..., + type_comment: str | None = None, + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, items: list[withitem], body: list[stmt], type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... class Raise(stmt): if sys.version_info >= (3, 10): __match_args__ = ("exc", "cause") exc: expr | None cause: expr | None + def __init__(self, exc: expr | None = None, cause: expr | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class Try(stmt): if sys.version_info >= (3, 10): @@ -182,6 +465,24 @@ class Try(stmt): handlers: list[ExceptHandler] orelse: list[stmt] finalbody: list[stmt] + if sys.version_info >= (3, 13): + def __init__( + self, + body: list[stmt] = ..., + handlers: list[ExceptHandler] = ..., + orelse: list[stmt] = ..., + finalbody: list[stmt] = ..., + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + body: list[stmt], + handlers: list[ExceptHandler], + orelse: list[stmt], + finalbody: list[stmt], + **kwargs: Unpack[_Attributes], + ) -> None: ... if sys.version_info >= (3, 11): class TryStar(stmt): @@ -190,17 +491,40 @@ if sys.version_info >= (3, 11): handlers: list[ExceptHandler] orelse: list[stmt] finalbody: list[stmt] + if sys.version_info >= (3, 13): + def __init__( + self, + body: list[stmt] = ..., + handlers: list[ExceptHandler] = ..., + orelse: list[stmt] = ..., + finalbody: list[stmt] = ..., + **kwargs: Unpack[_Attributes], + ) -> None: ... + else: + def __init__( + self, + body: list[stmt], + handlers: list[ExceptHandler], + orelse: list[stmt], + finalbody: list[stmt], + **kwargs: Unpack[_Attributes], + ) -> None: ... class Assert(stmt): if sys.version_info >= (3, 10): __match_args__ = ("test", "msg") test: expr msg: expr | None + def __init__(self, test: expr, msg: expr | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class Import(stmt): if sys.version_info >= (3, 10): __match_args__ = ("names",) names: list[alias] + if sys.version_info >= (3, 13): + def __init__(self, names: list[alias] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, names: list[alias], **kwargs: Unpack[_Attributes]) -> None: ... class ImportFrom(stmt): if sys.version_info >= (3, 10): @@ -208,32 +532,65 @@ class ImportFrom(stmt): module: str | None names: list[alias] level: int + if sys.version_info >= (3, 13): + @overload + def __init__(self, module: str | None, names: list[alias], level: int, **kwargs: Unpack[_Attributes]) -> None: ... + @overload + def __init__( + self, module: str | None = None, names: list[alias] = ..., *, level: int, **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + @overload + def __init__(self, module: str | None, names: list[alias], level: int, **kwargs: Unpack[_Attributes]) -> None: ... + @overload + def __init__( + self, module: str | None = None, *, names: list[alias], level: int, **kwargs: Unpack[_Attributes] + ) -> None: ... class Global(stmt): if sys.version_info >= (3, 10): __match_args__ = ("names",) names: list[_Identifier] + if sys.version_info >= (3, 13): + def __init__(self, names: list[_Identifier] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, names: list[_Identifier], **kwargs: Unpack[_Attributes]) -> None: ... class Nonlocal(stmt): if sys.version_info >= (3, 10): __match_args__ = ("names",) names: list[_Identifier] + if sys.version_info >= (3, 13): + def __init__(self, names: list[_Identifier] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, names: list[_Identifier], **kwargs: Unpack[_Attributes]) -> None: ... class Expr(stmt): if sys.version_info >= (3, 10): __match_args__ = ("value",) value: expr + def __init__(self, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Pass(stmt): ... class Break(stmt): ... class Continue(stmt): ... -class expr(AST): ... + +class expr(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None + def __init__(self, **kwargs: Unpack[_Attributes]) -> None: ... class BoolOp(expr): if sys.version_info >= (3, 10): __match_args__ = ("op", "values") op: boolop values: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, op: boolop, values: list[expr] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, op: boolop, values: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class BinOp(expr): if sys.version_info >= (3, 10): @@ -241,18 +598,21 @@ class BinOp(expr): left: expr op: operator right: expr + def __init__(self, left: expr, op: operator, right: expr, **kwargs: Unpack[_Attributes]) -> None: ... class UnaryOp(expr): if sys.version_info >= (3, 10): __match_args__ = ("op", "operand") op: unaryop operand: expr + def __init__(self, op: unaryop, operand: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Lambda(expr): if sys.version_info >= (3, 10): __match_args__ = ("args", "body") args: arguments body: expr + def __init__(self, args: arguments, body: expr, **kwargs: Unpack[_Attributes]) -> None: ... class IfExp(expr): if sys.version_info >= (3, 10): @@ -260,29 +620,46 @@ class IfExp(expr): test: expr body: expr orelse: expr + def __init__(self, test: expr, body: expr, orelse: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Dict(expr): if sys.version_info >= (3, 10): __match_args__ = ("keys", "values") keys: list[expr | None] values: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, keys: list[expr | None] = ..., values: list[expr] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, keys: list[expr | None], values: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class Set(expr): if sys.version_info >= (3, 10): __match_args__ = ("elts",) elts: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, elts: list[expr] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elts: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class ListComp(expr): if sys.version_info >= (3, 10): __match_args__ = ("elt", "generators") elt: expr generators: list[comprehension] + if sys.version_info >= (3, 13): + def __init__(self, elt: expr, generators: list[comprehension] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elt: expr, generators: list[comprehension], **kwargs: Unpack[_Attributes]) -> None: ... class SetComp(expr): if sys.version_info >= (3, 10): __match_args__ = ("elt", "generators") elt: expr generators: list[comprehension] + if sys.version_info >= (3, 13): + def __init__(self, elt: expr, generators: list[comprehension] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elt: expr, generators: list[comprehension], **kwargs: Unpack[_Attributes]) -> None: ... class DictComp(expr): if sys.version_info >= (3, 10): @@ -290,27 +667,40 @@ class DictComp(expr): key: expr value: expr generators: list[comprehension] + if sys.version_info >= (3, 13): + def __init__( + self, key: expr, value: expr, generators: list[comprehension] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__(self, key: expr, value: expr, generators: list[comprehension], **kwargs: Unpack[_Attributes]) -> None: ... class GeneratorExp(expr): if sys.version_info >= (3, 10): __match_args__ = ("elt", "generators") elt: expr generators: list[comprehension] + if sys.version_info >= (3, 13): + def __init__(self, elt: expr, generators: list[comprehension] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elt: expr, generators: list[comprehension], **kwargs: Unpack[_Attributes]) -> None: ... class Await(expr): if sys.version_info >= (3, 10): __match_args__ = ("value",) value: expr + def __init__(self, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Yield(expr): if sys.version_info >= (3, 10): __match_args__ = ("value",) value: expr | None + def __init__(self, value: expr | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class YieldFrom(expr): if sys.version_info >= (3, 10): __match_args__ = ("value",) value: expr + def __init__(self, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Compare(expr): if sys.version_info >= (3, 10): @@ -318,6 +708,12 @@ class Compare(expr): left: expr ops: list[cmpop] comparators: list[expr] + if sys.version_info >= (3, 13): + def __init__( + self, left: expr, ops: list[cmpop] = ..., comparators: list[expr] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__(self, left: expr, ops: list[cmpop], comparators: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class Call(expr): if sys.version_info >= (3, 10): @@ -325,6 +721,12 @@ class Call(expr): func: expr args: list[expr] keywords: list[keyword] + if sys.version_info >= (3, 13): + def __init__( + self, func: expr, args: list[expr] = ..., keywords: list[keyword] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + def __init__(self, func: expr, args: list[expr], keywords: list[keyword], **kwargs: Unpack[_Attributes]) -> None: ... class FormattedValue(expr): if sys.version_info >= (3, 10): @@ -332,11 +734,16 @@ class FormattedValue(expr): value: expr conversion: int format_spec: expr | None + def __init__(self, value: expr, conversion: int, format_spec: expr | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class JoinedStr(expr): if sys.version_info >= (3, 10): __match_args__ = ("values",) values: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, values: list[expr] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, values: list[expr], **kwargs: Unpack[_Attributes]) -> None: ... class Constant(expr): if sys.version_info >= (3, 10): @@ -346,72 +753,94 @@ class Constant(expr): # Aliases for value, for backwards compatibility s: Any n: int | float | complex + def __init__(self, value: Any, kind: str | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class NamedExpr(expr): if sys.version_info >= (3, 10): __match_args__ = ("target", "value") target: Name value: expr + def __init__(self, target: Name, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... class Attribute(expr): if sys.version_info >= (3, 10): __match_args__ = ("value", "attr", "ctx") value: expr attr: _Identifier - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` + def __init__(self, value: expr, attr: _Identifier, ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... if sys.version_info >= (3, 9): _Slice: typing_extensions.TypeAlias = expr + _SliceAttributes: typing_extensions.TypeAlias = _Attributes else: class slice(AST): ... _Slice: typing_extensions.TypeAlias = slice + class _SliceAttributes(TypedDict): ... + class Slice(_Slice): if sys.version_info >= (3, 10): __match_args__ = ("lower", "upper", "step") lower: expr | None upper: expr | None step: expr | None + def __init__( + self, lower: expr | None = None, upper: expr | None = None, step: expr | None = None, **kwargs: Unpack[_SliceAttributes] + ) -> None: ... if sys.version_info < (3, 9): class ExtSlice(slice): dims: list[slice] + def __init__(self, dims: list[slice], **kwargs: Unpack[_SliceAttributes]) -> None: ... class Index(slice): value: expr + def __init__(self, value: expr, **kwargs: Unpack[_SliceAttributes]) -> None: ... class Subscript(expr): if sys.version_info >= (3, 10): __match_args__ = ("value", "slice", "ctx") value: expr slice: _Slice - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` + def __init__(self, value: expr, slice: _Slice, ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... class Starred(expr): if sys.version_info >= (3, 10): __match_args__ = ("value", "ctx") value: expr - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` + def __init__(self, value: expr, ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... class Name(expr): if sys.version_info >= (3, 10): __match_args__ = ("id", "ctx") id: _Identifier - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` + def __init__(self, id: _Identifier, ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... class List(expr): if sys.version_info >= (3, 10): __match_args__ = ("elts", "ctx") elts: list[expr] - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` + if sys.version_info >= (3, 13): + def __init__(self, elts: list[expr] = ..., ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elts: list[expr], ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... class Tuple(expr): if sys.version_info >= (3, 10): __match_args__ = ("elts", "ctx") elts: list[expr] - ctx: expr_context + ctx: expr_context # Not present in Python < 3.13 if not passed to `__init__` if sys.version_info >= (3, 9): dims: list[expr] + if sys.version_info >= (3, 13): + def __init__(self, elts: list[expr] = ..., ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, elts: list[expr], ctx: expr_context = ..., **kwargs: Unpack[_Attributes]) -> None: ... class expr_context(AST): ... @@ -422,6 +851,7 @@ if sys.version_info < (3, 9): class Suite(mod): body: list[stmt] + def __init__(self, body: list[stmt]) -> None: ... class Del(expr_context): ... class Load(expr_context): ... @@ -467,8 +897,20 @@ class comprehension(AST): iter: expr ifs: list[expr] is_async: int - -class excepthandler(AST): ... + if sys.version_info >= (3, 13): + @overload + def __init__(self, target: expr, iter: expr, ifs: list[expr], is_async: int) -> None: ... + @overload + def __init__(self, target: expr, iter: expr, ifs: list[expr] = ..., *, is_async: int) -> None: ... + else: + def __init__(self, target: expr, iter: expr, ifs: list[expr], is_async: int) -> None: ... + +class excepthandler(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None + def __init__(self, **kwargs: Unpack[_Attributes]) -> None: ... class ExceptHandler(excepthandler): if sys.version_info >= (3, 10): @@ -476,6 +918,19 @@ class ExceptHandler(excepthandler): type: expr | None name: _Identifier | None body: list[stmt] + if sys.version_info >= (3, 13): + def __init__( + self, type: expr | None = None, name: _Identifier | None = None, body: list[stmt] = ..., **kwargs: Unpack[_Attributes] + ) -> None: ... + else: + @overload + def __init__( + self, type: expr | None, name: _Identifier | None, body: list[stmt], **kwargs: Unpack[_Attributes] + ) -> None: ... + @overload + def __init__( + self, type: expr | None = None, name: _Identifier | None = None, *, body: list[stmt], **kwargs: Unpack[_Attributes] + ) -> None: ... class arguments(AST): if sys.version_info >= (3, 10): @@ -487,38 +942,117 @@ class arguments(AST): kw_defaults: list[expr | None] kwarg: arg | None defaults: list[expr] + if sys.version_info >= (3, 13): + def __init__( + self, + posonlyargs: list[arg] = ..., + args: list[arg] = ..., + vararg: arg | None = None, + kwonlyargs: list[arg] = ..., + kw_defaults: list[expr | None] = ..., + kwarg: arg | None = None, + defaults: list[expr] = ..., + ) -> None: ... + else: + @overload + def __init__( + self, + posonlyargs: list[arg], + args: list[arg], + vararg: arg | None, + kwonlyargs: list[arg], + kw_defaults: list[expr | None], + kwarg: arg | None, + defaults: list[expr], + ) -> None: ... + @overload + def __init__( + self, + posonlyargs: list[arg], + args: list[arg], + vararg: arg | None, + kwonlyargs: list[arg], + kw_defaults: list[expr | None], + kwarg: arg | None = None, + *, + defaults: list[expr], + ) -> None: ... + @overload + def __init__( + self, + posonlyargs: list[arg], + args: list[arg], + vararg: arg | None = None, + *, + kwonlyargs: list[arg], + kw_defaults: list[expr | None], + kwarg: arg | None = None, + defaults: list[expr], + ) -> None: ... class arg(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None if sys.version_info >= (3, 10): __match_args__ = ("arg", "annotation", "type_comment") arg: _Identifier annotation: expr | None + type_comment: str | None + def __init__( + self, arg: _Identifier, annotation: expr | None = None, type_comment: str | None = None, **kwargs: Unpack[_Attributes] + ) -> None: ... class keyword(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None if sys.version_info >= (3, 10): __match_args__ = ("arg", "value") arg: _Identifier | None value: expr + @overload + def __init__(self, arg: _Identifier | None, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... + @overload + def __init__(self, arg: _Identifier | None = None, *, value: expr, **kwargs: Unpack[_Attributes]) -> None: ... class alias(AST): + lineno: int + col_offset: int + end_lineno: int | None + end_col_offset: int | None if sys.version_info >= (3, 10): __match_args__ = ("name", "asname") name: str asname: _Identifier | None + def __init__(self, name: str, asname: _Identifier | None = None, **kwargs: Unpack[_Attributes]) -> None: ... class withitem(AST): if sys.version_info >= (3, 10): __match_args__ = ("context_expr", "optional_vars") context_expr: expr optional_vars: expr | None + def __init__(self, context_expr: expr, optional_vars: expr | None = None) -> None: ... if sys.version_info >= (3, 10): class Match(stmt): __match_args__ = ("subject", "cases") subject: expr cases: list[match_case] + if sys.version_info >= (3, 13): + def __init__(self, subject: expr, cases: list[match_case] = ..., **kwargs: Unpack[_Attributes]) -> None: ... + else: + def __init__(self, subject: expr, cases: list[match_case], **kwargs: Unpack[_Attributes]) -> None: ... + + class pattern(AST): + lineno: int + col_offset: int + end_lineno: int + end_col_offset: int + def __init__(self, **kwargs: Unpack[_Attributes[int]]) -> None: ... - class pattern(AST): ... # Without the alias, Pyright complains variables named pattern are recursively defined _Pattern: typing_extensions.TypeAlias = pattern @@ -527,28 +1061,58 @@ if sys.version_info >= (3, 10): pattern: _Pattern guard: expr | None body: list[stmt] + if sys.version_info >= (3, 13): + def __init__(self, pattern: _Pattern, guard: expr | None = None, body: list[stmt] = ...) -> None: ... + else: + @overload + def __init__(self, pattern: _Pattern, guard: expr | None, body: list[stmt]) -> None: ... + @overload + def __init__(self, pattern: _Pattern, guard: expr | None = None, *, body: list[stmt]) -> None: ... class MatchValue(pattern): __match_args__ = ("value",) value: expr + def __init__(self, value: expr, **kwargs: Unpack[_Attributes[int]]) -> None: ... class MatchSingleton(pattern): __match_args__ = ("value",) value: Literal[True, False] | None + def __init__(self, value: Literal[True, False] | None, **kwargs: Unpack[_Attributes[int]]) -> None: ... class MatchSequence(pattern): __match_args__ = ("patterns",) patterns: list[pattern] + if sys.version_info >= (3, 13): + def __init__(self, patterns: list[pattern] = ..., **kwargs: Unpack[_Attributes[int]]) -> None: ... + else: + def __init__(self, patterns: list[pattern], **kwargs: Unpack[_Attributes[int]]) -> None: ... class MatchStar(pattern): __match_args__ = ("name",) name: _Identifier | None + def __init__(self, name: _Identifier | None, **kwargs: Unpack[_Attributes[int]]) -> None: ... class MatchMapping(pattern): __match_args__ = ("keys", "patterns", "rest") keys: list[expr] patterns: list[pattern] rest: _Identifier | None + if sys.version_info >= (3, 13): + def __init__( + self, + keys: list[expr] = ..., + patterns: list[pattern] = ..., + rest: _Identifier | None = None, + **kwargs: Unpack[_Attributes[int]], + ) -> None: ... + else: + def __init__( + self, + keys: list[expr], + patterns: list[pattern], + rest: _Identifier | None = None, + **kwargs: Unpack[_Attributes[int]], + ) -> None: ... class MatchClass(pattern): __match_args__ = ("cls", "patterns", "kwd_attrs", "kwd_patterns") @@ -556,36 +1120,111 @@ if sys.version_info >= (3, 10): patterns: list[pattern] kwd_attrs: list[_Identifier] kwd_patterns: list[pattern] + if sys.version_info >= (3, 13): + def __init__( + self, + cls: expr, + patterns: list[pattern] = ..., + kwd_attrs: list[_Identifier] = ..., + kwd_patterns: list[pattern] = ..., + **kwargs: Unpack[_Attributes[int]], + ) -> None: ... + else: + def __init__( + self, + cls: expr, + patterns: list[pattern], + kwd_attrs: list[_Identifier], + kwd_patterns: list[pattern], + **kwargs: Unpack[_Attributes[int]], + ) -> None: ... class MatchAs(pattern): __match_args__ = ("pattern", "name") pattern: _Pattern | None name: _Identifier | None + def __init__( + self, pattern: _Pattern | None = None, name: _Identifier | None = None, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... class MatchOr(pattern): __match_args__ = ("patterns",) patterns: list[pattern] + if sys.version_info >= (3, 13): + def __init__(self, patterns: list[pattern] = ..., **kwargs: Unpack[_Attributes[int]]) -> None: ... + else: + def __init__(self, patterns: list[pattern], **kwargs: Unpack[_Attributes[int]]) -> None: ... if sys.version_info >= (3, 12): class type_param(AST): + lineno: int + col_offset: int end_lineno: int end_col_offset: int + def __init__(self, **kwargs: Unpack[_Attributes[int]]) -> None: ... class TypeVar(type_param): - __match_args__ = ("name", "bound") + if sys.version_info >= (3, 13): + __match_args__ = ("name", "bound", "default_value") + else: + __match_args__ = ("name", "bound") name: _Identifier bound: expr | None + if sys.version_info >= (3, 13): + default_value: expr | None + def __init__( + self, + name: _Identifier, + bound: expr | None = None, + default_value: expr | None = None, + **kwargs: Unpack[_Attributes[int]], + ) -> None: ... + else: + def __init__(self, name: _Identifier, bound: expr | None = None, **kwargs: Unpack[_Attributes[int]]) -> None: ... class ParamSpec(type_param): - __match_args__ = ("name",) + if sys.version_info >= (3, 13): + __match_args__ = ("name", "default_value") + else: + __match_args__ = ("name",) name: _Identifier + if sys.version_info >= (3, 13): + default_value: expr | None + def __init__( + self, name: _Identifier, default_value: expr | None = None, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... + else: + def __init__(self, name: _Identifier, **kwargs: Unpack[_Attributes[int]]) -> None: ... class TypeVarTuple(type_param): - __match_args__ = ("name",) + if sys.version_info >= (3, 13): + __match_args__ = ("name", "default_value") + else: + __match_args__ = ("name",) name: _Identifier + if sys.version_info >= (3, 13): + default_value: expr | None + def __init__( + self, name: _Identifier, default_value: expr | None = None, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... + else: + def __init__(self, name: _Identifier, **kwargs: Unpack[_Attributes[int]]) -> None: ... class TypeAlias(stmt): __match_args__ = ("name", "type_params", "value") name: Name type_params: list[type_param] value: expr + if sys.version_info >= (3, 13): + @overload + def __init__( + self, name: Name, type_params: list[type_param], value: expr, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... + @overload + def __init__( + self, name: Name, type_params: list[type_param] = ..., *, value: expr, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... + else: + def __init__( + self, name: Name, type_params: list[type_param], value: expr, **kwargs: Unpack[_Attributes[int]] + ) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/_ctypes.pyi b/crates/red_knot/vendor/typeshed/stdlib/_ctypes.pyi index 60bbc51d9411f..4a944bd7dddc6 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_ctypes.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_ctypes.pyi @@ -201,7 +201,7 @@ class Array(_CData, Generic[_CT]): # Sized and _CData prevents using _CDataMeta. def __len__(self) -> int: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def addressof(obj: _CData) -> int: ... def alignment(obj_or_type: _CData | type[_CData]) -> int: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/_socket.pyi b/crates/red_knot/vendor/typeshed/stdlib/_socket.pyi index 2a48349d4f7db..affa8d63ecfab 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_socket.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_socket.pyi @@ -783,7 +783,7 @@ def ntohl(x: int, /) -> int: ... # param & ret val are 32-bit ints def ntohs(x: int, /) -> int: ... # param & ret val are 16-bit ints def htonl(x: int, /) -> int: ... # param & ret val are 32-bit ints def htons(x: int, /) -> int: ... # param & ret val are 16-bit ints -def inet_aton(ip_string: str, /) -> bytes: ... # ret val 4 bytes in length +def inet_aton(ip_addr: str, /) -> bytes: ... # ret val 4 bytes in length def inet_ntoa(packed_ip: ReadableBuffer, /) -> str: ... def inet_pton(address_family: int, ip_string: str, /) -> bytes: ... def inet_ntop(address_family: int, packed_ip: ReadableBuffer, /) -> str: ... @@ -797,7 +797,7 @@ if sys.platform != "win32": def socketpair(family: int = ..., type: int = ..., proto: int = ..., /) -> tuple[socket, socket]: ... def if_nameindex() -> list[tuple[int, str]]: ... -def if_nametoindex(name: str, /) -> int: ... +def if_nametoindex(oname: str, /) -> int: ... def if_indextoname(index: int, /) -> str: ... CAPI: object diff --git a/crates/red_knot/vendor/typeshed/stdlib/_stat.pyi b/crates/red_knot/vendor/typeshed/stdlib/_stat.pyi index 347897404edcc..c4e918d8b57f6 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_stat.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_stat.pyi @@ -64,19 +64,19 @@ UF_NODUMP: Literal[0x00000001] UF_NOUNLINK: Literal[0x00000010] UF_OPAQUE: Literal[0x00000008] -def S_IMODE(mode: int) -> int: ... -def S_IFMT(mode: int) -> int: ... -def S_ISBLK(mode: int) -> bool: ... -def S_ISCHR(mode: int) -> bool: ... -def S_ISDIR(mode: int) -> bool: ... -def S_ISDOOR(mode: int) -> bool: ... -def S_ISFIFO(mode: int) -> bool: ... -def S_ISLNK(mode: int) -> bool: ... -def S_ISPORT(mode: int) -> bool: ... -def S_ISREG(mode: int) -> bool: ... -def S_ISSOCK(mode: int) -> bool: ... -def S_ISWHT(mode: int) -> bool: ... -def filemode(mode: int) -> str: ... +def S_IMODE(mode: int, /) -> int: ... +def S_IFMT(mode: int, /) -> int: ... +def S_ISBLK(mode: int, /) -> bool: ... +def S_ISCHR(mode: int, /) -> bool: ... +def S_ISDIR(mode: int, /) -> bool: ... +def S_ISDOOR(mode: int, /) -> bool: ... +def S_ISFIFO(mode: int, /) -> bool: ... +def S_ISLNK(mode: int, /) -> bool: ... +def S_ISPORT(mode: int, /) -> bool: ... +def S_ISREG(mode: int, /) -> bool: ... +def S_ISSOCK(mode: int, /) -> bool: ... +def S_ISWHT(mode: int, /) -> bool: ... +def filemode(mode: int, /) -> str: ... if sys.platform == "win32": IO_REPARSE_TAG_SYMLINK: int @@ -101,3 +101,17 @@ if sys.platform == "win32": FILE_ATTRIBUTE_SYSTEM: Literal[4] FILE_ATTRIBUTE_TEMPORARY: Literal[256] FILE_ATTRIBUTE_VIRTUAL: Literal[65536] + +if sys.version_info >= (3, 13): + SF_SETTABLE: Literal[0x3FFF0000] + # https://github.com/python/cpython/issues/114081#issuecomment-2119017790 + # SF_RESTRICTED: Literal[0x00080000] + SF_FIRMLINK: Literal[0x00800000] + SF_DATALESS: Literal[0x40000000] + + SF_SUPPORTED: Literal[0x9F0000] + SF_SYNTHETIC: Literal[0xC0000000] + + UF_TRACKED: Literal[0x00000040] + UF_DATAVAULT: Literal[0x00000080] + UF_SETTABLE: Literal[0x0000FFFF] diff --git a/crates/red_knot/vendor/typeshed/stdlib/_typeshed/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/_typeshed/__init__.pyi index 6937d97b87eac..7201819b25ed9 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_typeshed/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_typeshed/__init__.pyi @@ -326,6 +326,8 @@ class structseq(Generic[_T_co]): # but only has any meaning if you supply it a dict where the keys are strings. # https://github.com/python/typeshed/pull/6560#discussion_r767149830 def __new__(cls: type[Self], sequence: Iterable[_T_co], dict: dict[str, Any] = ...) -> Self: ... + if sys.version_info >= (3, 13): + def __replace__(self: Self, **kwargs: Any) -> Self: ... # Superset of typing.AnyStr that also includes LiteralString AnyOrLiteralStr = TypeVar("AnyOrLiteralStr", str, bytes, LiteralString) # noqa: Y001 diff --git a/crates/red_knot/vendor/typeshed/stdlib/_weakref.pyi b/crates/red_knot/vendor/typeshed/stdlib/_weakref.pyi index e395143cc027d..61365645d768a 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_weakref.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_weakref.pyi @@ -27,7 +27,7 @@ class ReferenceType(Generic[_T]): def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... ref = ReferenceType diff --git a/crates/red_knot/vendor/typeshed/stdlib/_weakrefset.pyi b/crates/red_knot/vendor/typeshed/stdlib/_weakrefset.pyi index 6482ade1271e3..2a4e682f64ed7 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/_weakrefset.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/_weakrefset.pyi @@ -48,4 +48,4 @@ class WeakSet(MutableSet[_T]): def __or__(self, other: Iterable[_S]) -> WeakSet[_S | _T]: ... def isdisjoint(self, other: Iterable[_T]) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/argparse.pyi b/crates/red_knot/vendor/typeshed/stdlib/argparse.pyi index 0701654734a40..1956d08c9933e 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/argparse.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/argparse.pyi @@ -318,51 +318,95 @@ class Action(_AttributeHolder): required: bool help: str | None metavar: str | tuple[str, ...] | None - def __init__( - self, - option_strings: Sequence[str], - dest: str, - nargs: int | str | None = None, - const: _T | None = None, - default: _T | str | None = None, - type: Callable[[str], _T] | FileType | None = None, - choices: Iterable[_T] | None = None, - required: bool = False, - help: str | None = None, - metavar: str | tuple[str, ...] | None = None, - ) -> None: ... - def __call__( - self, parser: ArgumentParser, namespace: Namespace, values: str | Sequence[Any] | None, option_string: str | None = None - ) -> None: ... - if sys.version_info >= (3, 9): - def format_usage(self) -> str: ... - -if sys.version_info >= (3, 12): - class BooleanOptionalAction(Action): - @overload + if sys.version_info >= (3, 13): def __init__( self, option_strings: Sequence[str], dest: str, - default: bool | None = None, - *, + nargs: int | str | None = None, + const: _T | None = None, + default: _T | str | None = None, + type: Callable[[str], _T] | FileType | None = None, + choices: Iterable[_T] | None = None, required: bool = False, help: str | None = None, + metavar: str | tuple[str, ...] | None = None, + deprecated: bool = False, ) -> None: ... - @overload - @deprecated("The `type`, `choices`, and `metavar` parameters are ignored and will be removed in Python 3.14.") + else: def __init__( self, option_strings: Sequence[str], dest: str, - default: _T | bool | None = None, - type: Callable[[str], _T] | FileType | None = sentinel, - choices: Iterable[_T] | None = sentinel, + nargs: int | str | None = None, + const: _T | None = None, + default: _T | str | None = None, + type: Callable[[str], _T] | FileType | None = None, + choices: Iterable[_T] | None = None, required: bool = False, help: str | None = None, - metavar: str | tuple[str, ...] | None = sentinel, + metavar: str | tuple[str, ...] | None = None, ) -> None: ... + def __call__( + self, parser: ArgumentParser, namespace: Namespace, values: str | Sequence[Any] | None, option_string: str | None = None + ) -> None: ... + if sys.version_info >= (3, 9): + def format_usage(self) -> str: ... + +if sys.version_info >= (3, 12): + class BooleanOptionalAction(Action): + if sys.version_info >= (3, 13): + @overload + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: bool | None = None, + *, + required: bool = False, + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + @overload + @deprecated("The `type`, `choices`, and `metavar` parameters are ignored and will be removed in Python 3.14.") + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: _T | bool | None = None, + type: Callable[[str], _T] | FileType | None = sentinel, + choices: Iterable[_T] | None = sentinel, + required: bool = False, + help: str | None = None, + metavar: str | tuple[str, ...] | None = sentinel, + deprecated: bool = False, + ) -> None: ... + else: + @overload + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: bool | None = None, + *, + required: bool = False, + help: str | None = None, + ) -> None: ... + @overload + @deprecated("The `type`, `choices`, and `metavar` parameters are ignored and will be removed in Python 3.14.") + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: _T | bool | None = None, + type: Callable[[str], _T] | FileType | None = sentinel, + choices: Iterable[_T] | None = sentinel, + required: bool = False, + help: str | None = None, + metavar: str | tuple[str, ...] | None = sentinel, + ) -> None: ... + elif sys.version_info >= (3, 9): class BooleanOptionalAction(Action): @overload @@ -431,7 +475,19 @@ class _StoreAction(Action): ... # undocumented class _StoreConstAction(Action): - if sys.version_info >= (3, 11): + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str, + const: Any | None = None, + default: Any = None, + required: bool = False, + help: str | None = None, + metavar: str | tuple[str, ...] | None = None, + deprecated: bool = False, + ) -> None: ... + elif sys.version_info >= (3, 11): def __init__( self, option_strings: Sequence[str], @@ -456,15 +512,37 @@ class _StoreConstAction(Action): # undocumented class _StoreTrueAction(_StoreConstAction): - def __init__( - self, option_strings: Sequence[str], dest: str, default: bool = False, required: bool = False, help: str | None = None - ) -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: bool = False, + required: bool = False, + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + else: + def __init__( + self, option_strings: Sequence[str], dest: str, default: bool = False, required: bool = False, help: str | None = None + ) -> None: ... # undocumented class _StoreFalseAction(_StoreConstAction): - def __init__( - self, option_strings: Sequence[str], dest: str, default: bool = True, required: bool = False, help: str | None = None - ) -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: bool = True, + required: bool = False, + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + else: + def __init__( + self, option_strings: Sequence[str], dest: str, default: bool = True, required: bool = False, help: str | None = None + ) -> None: ... # undocumented class _AppendAction(Action): ... @@ -474,7 +552,19 @@ class _ExtendAction(_AppendAction): ... # undocumented class _AppendConstAction(Action): - if sys.version_info >= (3, 11): + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str, + const: Any | None = None, + default: Any = None, + required: bool = False, + help: str | None = None, + metavar: str | tuple[str, ...] | None = None, + deprecated: bool = False, + ) -> None: ... + elif sys.version_info >= (3, 11): def __init__( self, option_strings: Sequence[str], @@ -499,27 +589,72 @@ class _AppendConstAction(Action): # undocumented class _CountAction(Action): - def __init__( - self, option_strings: Sequence[str], dest: str, default: Any = None, required: bool = False, help: str | None = None - ) -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str, + default: Any = None, + required: bool = False, + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + else: + def __init__( + self, option_strings: Sequence[str], dest: str, default: Any = None, required: bool = False, help: str | None = None + ) -> None: ... # undocumented class _HelpAction(Action): - def __init__( - self, option_strings: Sequence[str], dest: str = "==SUPPRESS==", default: str = "==SUPPRESS==", help: str | None = None - ) -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + dest: str = "==SUPPRESS==", + default: str = "==SUPPRESS==", + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + else: + def __init__( + self, + option_strings: Sequence[str], + dest: str = "==SUPPRESS==", + default: str = "==SUPPRESS==", + help: str | None = None, + ) -> None: ... # undocumented class _VersionAction(Action): version: str | None - def __init__( - self, - option_strings: Sequence[str], - version: str | None = None, - dest: str = "==SUPPRESS==", - default: str = "==SUPPRESS==", - help: str = "show program's version number and exit", - ) -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, + option_strings: Sequence[str], + version: str | None = None, + dest: str = "==SUPPRESS==", + default: str = "==SUPPRESS==", + help: str | None = None, + deprecated: bool = False, + ) -> None: ... + elif sys.version_info >= (3, 11): + def __init__( + self, + option_strings: Sequence[str], + version: str | None = None, + dest: str = "==SUPPRESS==", + default: str = "==SUPPRESS==", + help: str | None = None, + ) -> None: ... + else: + def __init__( + self, + option_strings: Sequence[str], + version: str | None = None, + dest: str = "==SUPPRESS==", + default: str = "==SUPPRESS==", + help: str = "show program's version number and exit", + ) -> None: ... # undocumented class _SubParsersAction(Action, Generic[_ArgumentParserT]): @@ -542,7 +677,30 @@ class _SubParsersAction(Action, Generic[_ArgumentParserT]): # Note: `add_parser` accepts all kwargs of `ArgumentParser.__init__`. It also # accepts its own `help` and `aliases` kwargs. - if sys.version_info >= (3, 9): + if sys.version_info >= (3, 13): + def add_parser( + self, + name: str, + *, + deprecated: bool = False, + help: str | None = ..., + aliases: Sequence[str] = ..., + # Kwargs from ArgumentParser constructor + prog: str | None = ..., + usage: str | None = ..., + description: str | None = ..., + epilog: str | None = ..., + parents: Sequence[_ArgumentParserT] = ..., + formatter_class: _FormatterClass = ..., + prefix_chars: str = ..., + fromfile_prefix_chars: str | None = ..., + argument_default: Any = ..., + conflict_handler: str = ..., + add_help: bool = ..., + allow_abbrev: bool = ..., + exit_on_error: bool = ..., + ) -> _ArgumentParserT: ... + elif sys.version_info >= (3, 9): def add_parser( self, name: str, diff --git a/crates/red_knot/vendor/typeshed/stdlib/array.pyi b/crates/red_knot/vendor/typeshed/stdlib/array.pyi index 1b7de1c7882d9..878d8d8cb808b 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/array.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/array.pyi @@ -87,6 +87,6 @@ class array(MutableSequence[_T]): def __buffer__(self, flags: int, /) -> memoryview: ... def __release_buffer__(self, buffer: memoryview, /) -> None: ... if sys.version_info >= (3, 12): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... ArrayType = array diff --git a/crates/red_knot/vendor/typeshed/stdlib/asyncio/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/asyncio/__init__.pyi index d5bbe8cb06428..daf28862aa6a9 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/asyncio/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/asyncio/__init__.pyi @@ -30,12 +30,12 @@ if sys.platform == "win32": else: from .unix_events import * -_T = TypeVar("_T") +_T_co = TypeVar("_T_co", covariant=True) # Aliases imported by multiple submodules in typeshed if sys.version_info >= (3, 12): - _AwaitableLike: TypeAlias = Awaitable[_T] # noqa: Y047 - _CoroutineLike: TypeAlias = Coroutine[Any, Any, _T] # noqa: Y047 + _AwaitableLike: TypeAlias = Awaitable[_T_co] # noqa: Y047 + _CoroutineLike: TypeAlias = Coroutine[Any, Any, _T_co] # noqa: Y047 else: - _AwaitableLike: TypeAlias = Generator[Any, None, _T] | Awaitable[_T] - _CoroutineLike: TypeAlias = Generator[Any, None, _T] | Coroutine[Any, Any, _T] + _AwaitableLike: TypeAlias = Generator[Any, None, _T_co] | Awaitable[_T_co] + _CoroutineLike: TypeAlias = Generator[Any, None, _T_co] | Coroutine[Any, Any, _T_co] diff --git a/crates/red_knot/vendor/typeshed/stdlib/asyncio/events.pyi b/crates/red_knot/vendor/typeshed/stdlib/asyncio/events.pyi index 95de28c5021e4..c0345eb1b5b54 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/asyncio/events.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/asyncio/events.pyi @@ -2,7 +2,7 @@ import ssl import sys from _typeshed import FileDescriptorLike, ReadableBuffer, StrPath, Unused, WriteableBuffer from abc import ABCMeta, abstractmethod -from collections.abc import Callable, Coroutine, Generator, Sequence +from collections.abc import Callable, Sequence from contextvars import Context from socket import AddressFamily, SocketKind, _Address, _RetAddress, socket from typing import IO, Any, Literal, Protocol, TypeVar, overload @@ -43,7 +43,7 @@ _ProtocolFactory: TypeAlias = Callable[[], BaseProtocol] _SSLContext: TypeAlias = bool | None | ssl.SSLContext class _TaskFactory(Protocol): - def __call__(self, loop: AbstractEventLoop, factory: Coroutine[Any, Any, _T] | Generator[Any, None, _T], /) -> Future[_T]: ... + def __call__(self, loop: AbstractEventLoop, factory: _CoroutineLike[_T], /) -> Future[_T]: ... class Handle: _cancelled: bool diff --git a/crates/red_knot/vendor/typeshed/stdlib/asyncio/futures.pyi b/crates/red_knot/vendor/typeshed/stdlib/asyncio/futures.pyi index a3953cdaf8c7c..e19fd53f33111 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/asyncio/futures.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/asyncio/futures.pyi @@ -52,6 +52,6 @@ class Future(Awaitable[_T], Iterable[_T]): @property def _loop(self) -> AbstractEventLoop: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def wrap_future(future: _ConcurrentFuture[_T] | Future[_T], *, loop: AbstractEventLoop | None = None) -> Future[_T]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/asyncio/queues.pyi b/crates/red_knot/vendor/typeshed/stdlib/asyncio/queues.pyi index bb4ee71f9267b..1d8f80f4c3881 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/asyncio/queues.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/asyncio/queues.pyi @@ -41,7 +41,7 @@ class Queue(Generic[_T], _LoopBoundMixin): # noqa: Y059 async def join(self) -> None: ... def task_done(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, type: Any) -> GenericAlias: ... + def __class_getitem__(cls, type: Any, /) -> GenericAlias: ... class PriorityQueue(Queue[_T]): ... class LifoQueue(Queue[_T]): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/asyncio/tasks.pyi b/crates/red_knot/vendor/typeshed/stdlib/asyncio/tasks.pyi index 67291071d512f..c16a1919b7c8f 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/asyncio/tasks.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/asyncio/tasks.pyi @@ -443,7 +443,7 @@ class Task(Future[_T_co]): # type: ignore[type-var] # pyright: ignore[reportIn @classmethod def all_tasks(cls, loop: AbstractEventLoop | None = None) -> set[Task[Any]]: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def all_tasks(loop: AbstractEventLoop | None = None) -> set[Task[Any]]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/atexit.pyi b/crates/red_knot/vendor/typeshed/stdlib/atexit.pyi index ea041d7b5e466..7f7b05ccc0a39 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/atexit.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/atexit.pyi @@ -8,5 +8,5 @@ _P = ParamSpec("_P") def _clear() -> None: ... def _ncallbacks() -> int: ... def _run_exitfuncs() -> None: ... -def register(func: Callable[_P, _T], *args: _P.args, **kwargs: _P.kwargs) -> Callable[_P, _T]: ... -def unregister(func: Callable[..., object]) -> None: ... +def register(func: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Callable[_P, _T]: ... +def unregister(func: Callable[..., object], /) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/base64.pyi b/crates/red_knot/vendor/typeshed/stdlib/base64.pyi index 4629c95d0949b..8be4cfe69de00 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/base64.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/base64.pyi @@ -25,6 +25,8 @@ __all__ = [ if sys.version_info >= (3, 10): __all__ += ["b32hexencode", "b32hexdecode"] +if sys.version_info >= (3, 13): + __all__ += ["z85decode", "z85encode"] def b64encode(s: ReadableBuffer, altchars: ReadableBuffer | None = None) -> bytes: ... def b64decode(s: str | ReadableBuffer, altchars: str | ReadableBuffer | None = None, validate: bool = False) -> bytes: ... @@ -57,3 +59,7 @@ def decodebytes(s: ReadableBuffer) -> bytes: ... if sys.version_info < (3, 9): def encodestring(s: ReadableBuffer) -> bytes: ... def decodestring(s: ReadableBuffer) -> bytes: ... + +if sys.version_info >= (3, 13): + def z85encode(s: ReadableBuffer) -> bytes: ... + def z85decode(s: str | ReadableBuffer) -> bytes: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/builtins.pyi b/crates/red_knot/vendor/typeshed/stdlib/builtins.pyi index f2da50b25758b..53e00ec6a5a96 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/builtins.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/builtins.pyi @@ -461,7 +461,7 @@ class str(Sequence[str]): def format(self: LiteralString, *args: LiteralString, **kwargs: LiteralString) -> LiteralString: ... @overload def format(self, *args: object, **kwargs: object) -> str: ... - def format_map(self, map: _FormatMapMapping) -> str: ... + def format_map(self, mapping: _FormatMapMapping, /) -> str: ... def index(self, sub: str, start: SupportsIndex | None = ..., end: SupportsIndex | None = ..., /) -> int: ... def isalnum(self) -> bool: ... def isalpha(self) -> bool: ... @@ -495,10 +495,20 @@ class str(Sequence[str]): def partition(self: LiteralString, sep: LiteralString, /) -> tuple[LiteralString, LiteralString, LiteralString]: ... @overload def partition(self, sep: str, /) -> tuple[str, str, str]: ... # type: ignore[misc] - @overload - def replace(self: LiteralString, old: LiteralString, new: LiteralString, count: SupportsIndex = -1, /) -> LiteralString: ... - @overload - def replace(self, old: str, new: str, count: SupportsIndex = -1, /) -> str: ... # type: ignore[misc] + if sys.version_info >= (3, 13): + @overload + def replace( + self: LiteralString, old: LiteralString, new: LiteralString, /, count: SupportsIndex = -1 + ) -> LiteralString: ... + @overload + def replace(self, old: str, new: str, /, count: SupportsIndex = -1) -> str: ... # type: ignore[misc] + else: + @overload + def replace( + self: LiteralString, old: LiteralString, new: LiteralString, count: SupportsIndex = -1, / + ) -> LiteralString: ... + @overload + def replace(self, old: str, new: str, count: SupportsIndex = -1, /) -> str: ... # type: ignore[misc] if sys.version_info >= (3, 9): @overload def removeprefix(self: LiteralString, prefix: LiteralString, /) -> LiteralString: ... @@ -1214,6 +1224,9 @@ class property: fset: Callable[[Any, Any], None] | None fdel: Callable[[Any], None] | None __isabstractmethod__: bool + if sys.version_info >= (3, 13): + __name__: str + def __init__( self, fget: Callable[[Any], Any] | None = ..., @@ -1321,12 +1334,34 @@ def divmod(x: _T_contra, y: SupportsRDivMod[_T_contra, _T_co], /) -> _T_co: ... # The `globals` argument to `eval` has to be `dict[str, Any]` rather than `dict[str, object]` due to invariance. # (The `globals` argument has to be a "real dict", rather than any old mapping, unlike the `locals` argument.) -def eval( - source: str | ReadableBuffer | CodeType, globals: dict[str, Any] | None = None, locals: Mapping[str, object] | None = None, / -) -> Any: ... +if sys.version_info >= (3, 13): + def eval( + source: str | ReadableBuffer | CodeType, + /, + globals: dict[str, Any] | None = None, + locals: Mapping[str, object] | None = None, + ) -> Any: ... + +else: + def eval( + source: str | ReadableBuffer | CodeType, + globals: dict[str, Any] | None = None, + locals: Mapping[str, object] | None = None, + /, + ) -> Any: ... # Comment above regarding `eval` applies to `exec` as well -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 13): + def exec( + source: str | ReadableBuffer | CodeType, + /, + globals: dict[str, Any] | None = None, + locals: Mapping[str, object] | None = None, + *, + closure: tuple[CellType, ...] | None = None, + ) -> None: ... + +elif sys.version_info >= (3, 11): def exec( source: str | ReadableBuffer | CodeType, globals: dict[str, Any] | None = None, @@ -2035,3 +2070,7 @@ if sys.version_info >= (3, 11): def split( self, condition: Callable[[_ExceptionT_co | Self], bool], / ) -> tuple[ExceptionGroup[_ExceptionT_co] | None, ExceptionGroup[_ExceptionT_co] | None]: ... + +if sys.version_info >= (3, 13): + class IncompleteInputError(SyntaxError): ... + class PythonFinalizationError(RuntimeError): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/calendar.pyi b/crates/red_knot/vendor/typeshed/stdlib/calendar.pyi index 5cc49e102fdf5..39312d0b25238 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/calendar.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/calendar.pyi @@ -4,7 +4,7 @@ import sys from _typeshed import Unused from collections.abc import Iterable, Sequence from time import struct_time -from typing import ClassVar, Literal +from typing import ClassVar, Final from typing_extensions import TypeAlias __all__ = [ @@ -154,18 +154,18 @@ month_abbr: Sequence[str] if sys.version_info >= (3, 12): class Month(enum.IntEnum): - JANUARY: Literal[1] - FEBRUARY: Literal[2] - MARCH: Literal[3] - APRIL: Literal[4] - MAY: Literal[5] - JUNE: Literal[6] - JULY: Literal[7] - AUGUST: Literal[8] - SEPTEMBER: Literal[9] - OCTOBER: Literal[10] - NOVEMBER: Literal[11] - DECEMBER: Literal[12] + JANUARY = 1 + FEBRUARY = 2 + MARCH = 3 + APRIL = 4 + MAY = 5 + JUNE = 6 + JULY = 7 + AUGUST = 8 + SEPTEMBER = 9 + OCTOBER = 10 + NOVEMBER = 11 + DECEMBER = 12 JANUARY = Month.JANUARY FEBRUARY = Month.FEBRUARY @@ -181,13 +181,13 @@ if sys.version_info >= (3, 12): DECEMBER = Month.DECEMBER class Day(enum.IntEnum): - MONDAY: Literal[0] - TUESDAY: Literal[1] - WEDNESDAY: Literal[2] - THURSDAY: Literal[3] - FRIDAY: Literal[4] - SATURDAY: Literal[5] - SUNDAY: Literal[6] + MONDAY = 0 + TUESDAY = 1 + WEDNESDAY = 2 + THURSDAY = 3 + FRIDAY = 4 + SATURDAY = 5 + SUNDAY = 6 MONDAY = Day.MONDAY TUESDAY = Day.TUESDAY @@ -197,12 +197,12 @@ if sys.version_info >= (3, 12): SATURDAY = Day.SATURDAY SUNDAY = Day.SUNDAY else: - MONDAY: Literal[0] - TUESDAY: Literal[1] - WEDNESDAY: Literal[2] - THURSDAY: Literal[3] - FRIDAY: Literal[4] - SATURDAY: Literal[5] - SUNDAY: Literal[6] - -EPOCH: Literal[1970] + MONDAY: Final = 0 + TUESDAY: Final = 1 + WEDNESDAY: Final = 2 + THURSDAY: Final = 3 + FRIDAY: Final = 4 + SATURDAY: Final = 5 + SUNDAY: Final = 6 + +EPOCH: Final = 1970 diff --git a/crates/red_knot/vendor/typeshed/stdlib/code.pyi b/crates/red_knot/vendor/typeshed/stdlib/code.pyi index 4715bd866ddce..02689238a9a51 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/code.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/code.pyi @@ -1,3 +1,4 @@ +import sys from codeop import CommandCompiler from collections.abc import Callable, Mapping from types import CodeType @@ -18,16 +19,34 @@ class InteractiveInterpreter: class InteractiveConsole(InteractiveInterpreter): buffer: list[str] # undocumented filename: str # undocumented - def __init__(self, locals: Mapping[str, Any] | None = None, filename: str = "") -> None: ... + if sys.version_info >= (3, 13): + def __init__( + self, locals: Mapping[str, Any] | None = None, filename: str = "", *, local_exit: bool = False + ) -> None: ... + def push(self, line: str, filename: str | None = None) -> bool: ... + else: + def __init__(self, locals: Mapping[str, Any] | None = None, filename: str = "") -> None: ... + def push(self, line: str) -> bool: ... + def interact(self, banner: str | None = None, exitmsg: str | None = None) -> None: ... - def push(self, line: str) -> bool: ... def resetbuffer(self) -> None: ... def raw_input(self, prompt: str = "") -> str: ... -def interact( - banner: str | None = None, - readfunc: Callable[[str], str] | None = None, - local: Mapping[str, Any] | None = None, - exitmsg: str | None = None, -) -> None: ... +if sys.version_info >= (3, 13): + def interact( + banner: str | None = None, + readfunc: Callable[[str], str] | None = None, + local: Mapping[str, Any] | None = None, + exitmsg: str | None = None, + local_exit: bool = False, + ) -> None: ... + +else: + def interact( + banner: str | None = None, + readfunc: Callable[[str], str] | None = None, + local: Mapping[str, Any] | None = None, + exitmsg: str | None = None, + ) -> None: ... + def compile_command(source: str, filename: str = "", symbol: str = "single") -> CodeType | None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/_base.pyi b/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/_base.pyi index 7dfdda2240131..3d5eccfc048dc 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/_base.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/_base.pyi @@ -54,7 +54,7 @@ class Future(Generic[_T]): def exception(self, timeout: float | None = None) -> BaseException | None: ... def set_exception(self, exception: BaseException | None) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class Executor: if sys.version_info >= (3, 9): diff --git a/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/thread.pyi b/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/thread.pyi index f38cf2c57963d..d1b7858eae026 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/thread.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/concurrent/futures/thread.pyi @@ -29,7 +29,7 @@ class _WorkItem(Generic[_S]): def __init__(self, future: Future[_S], fn: Callable[..., _S], args: Iterable[Any], kwargs: Mapping[str, Any]) -> None: ... def run(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def _worker( executor_reference: ref[Any], diff --git a/crates/red_knot/vendor/typeshed/stdlib/contextvars.pyi b/crates/red_knot/vendor/typeshed/stdlib/contextvars.pyi index ceb9085fa1879..dd5ea0acbe2c0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/contextvars.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/contextvars.pyi @@ -30,7 +30,7 @@ class ContextVar(Generic[_T]): def set(self, value: _T, /) -> Token[_T]: ... def reset(self, token: Token[_T], /) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @final class Token(Generic[_T]): @@ -40,7 +40,7 @@ class Token(Generic[_T]): def old_value(self) -> Any: ... # returns either _T or MISSING, but that's hard to express MISSING: ClassVar[object] if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def copy_context() -> Context: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/csv.pyi b/crates/red_knot/vendor/typeshed/stdlib/csv.pyi index 56f8bf029b129..24f0db3321653 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/csv.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/csv.pyi @@ -40,7 +40,6 @@ __all__ = [ "QUOTE_NONE", "Error", "Dialect", - "__doc__", "excel", "excel_tab", "field_size_limit", @@ -51,13 +50,14 @@ __all__ = [ "list_dialects", "Sniffer", "unregister_dialect", - "__version__", "DictReader", "DictWriter", "unix_dialect", ] if sys.version_info >= (3, 12): __all__ += ["QUOTE_STRINGS", "QUOTE_NOTNULL"] +if sys.version_info < (3, 13): + __all__ += ["__doc__", "__version__"] _T = TypeVar("_T") @@ -111,7 +111,7 @@ class DictReader(Iterator[dict[_T | Any, str | Any]], Generic[_T]): def __iter__(self) -> Self: ... def __next__(self) -> dict[_T | Any, str | Any]: ... if sys.version_info >= (3, 12): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class DictWriter(Generic[_T]): fieldnames: Collection[_T] @@ -139,7 +139,7 @@ class DictWriter(Generic[_T]): def writerow(self, rowdict: Mapping[_T, Any]) -> Any: ... def writerows(self, rowdicts: Iterable[Mapping[_T, Any]]) -> None: ... if sys.version_info >= (3, 12): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class Sniffer: preferred: list[str] diff --git a/crates/red_knot/vendor/typeshed/stdlib/ctypes/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/ctypes/__init__.pyi index 2fe551fa9dc20..dfd61c8f8ffca 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/ctypes/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/ctypes/__init__.pyi @@ -76,7 +76,7 @@ class LibraryLoader(Generic[_DLLT]): def __getitem__(self, name: str) -> _DLLT: ... def LoadLibrary(self, name: str) -> _DLLT: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... cdll: LibraryLoader[CDLL] if sys.platform == "win32": diff --git a/crates/red_knot/vendor/typeshed/stdlib/dataclasses.pyi b/crates/red_knot/vendor/typeshed/stdlib/dataclasses.pyi index c361122704a58..30489e6f8b3da 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/dataclasses.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/dataclasses.pyi @@ -5,7 +5,7 @@ from _typeshed import DataclassInstance from builtins import type as Type # alias to avoid name clashes with fields named "type" from collections.abc import Callable, Iterable, Mapping from typing import Any, Generic, Literal, Protocol, TypeVar, overload -from typing_extensions import TypeAlias, TypeGuard +from typing_extensions import TypeAlias, TypeIs if sys.version_info >= (3, 9): from types import GenericAlias @@ -143,7 +143,7 @@ class Field(Generic[_T]): def __set_name__(self, owner: Type[Any], name: str) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # NOTE: Actual return type is 'Field[_T]', but we want to help type checkers # to understand the magic that happens at runtime. @@ -214,11 +214,9 @@ else: def fields(class_or_instance: DataclassInstance | type[DataclassInstance]) -> tuple[Field[Any], ...]: ... @overload -def is_dataclass(obj: DataclassInstance) -> Literal[True]: ... +def is_dataclass(obj: type) -> TypeIs[type[DataclassInstance]]: ... @overload -def is_dataclass(obj: type) -> TypeGuard[type[DataclassInstance]]: ... -@overload -def is_dataclass(obj: object) -> TypeGuard[DataclassInstance | type[DataclassInstance]]: ... +def is_dataclass(obj: object) -> TypeIs[DataclassInstance | type[DataclassInstance]]: ... class FrozenInstanceError(AttributeError): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/datetime.pyi b/crates/red_knot/vendor/typeshed/stdlib/datetime.pyi index 7b890ca010dc5..71522a59d4df8 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/datetime.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/datetime.pyi @@ -79,6 +79,9 @@ class date: def isoformat(self) -> str: ... def timetuple(self) -> struct_time: ... def toordinal(self) -> int: ... + if sys.version_info >= (3, 13): + def __replace__(self, /, *, year: SupportsIndex = ..., month: SupportsIndex = ..., day: SupportsIndex = ...) -> Self: ... + def replace(self, year: SupportsIndex = ..., month: SupportsIndex = ..., day: SupportsIndex = ...) -> Self: ... def __le__(self, value: date, /) -> bool: ... def __lt__(self, value: date, /) -> bool: ... @@ -148,6 +151,19 @@ class time: def utcoffset(self) -> timedelta | None: ... def tzname(self) -> str | None: ... def dst(self) -> timedelta | None: ... + if sys.version_info >= (3, 13): + def __replace__( + self, + /, + *, + hour: SupportsIndex = ..., + minute: SupportsIndex = ..., + second: SupportsIndex = ..., + microsecond: SupportsIndex = ..., + tzinfo: _TzInfo | None = ..., + fold: int = ..., + ) -> Self: ... + def replace( self, hour: SupportsIndex = ..., @@ -263,6 +279,22 @@ class datetime(date): def date(self) -> _Date: ... def time(self) -> _Time: ... def timetz(self) -> _Time: ... + if sys.version_info >= (3, 13): + def __replace__( + self, + /, + *, + year: SupportsIndex = ..., + month: SupportsIndex = ..., + day: SupportsIndex = ..., + hour: SupportsIndex = ..., + minute: SupportsIndex = ..., + second: SupportsIndex = ..., + microsecond: SupportsIndex = ..., + tzinfo: _TzInfo | None = ..., + fold: int = ..., + ) -> Self: ... + def replace( self, year: SupportsIndex = ..., diff --git a/crates/red_knot/vendor/typeshed/stdlib/difflib.pyi b/crates/red_knot/vendor/typeshed/stdlib/difflib.pyi index d5b77b8f0e2c8..50154d785c2f5 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/difflib.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/difflib.pyi @@ -55,7 +55,7 @@ class SequenceMatcher(Generic[_T]): def quick_ratio(self) -> float: ... def real_quick_ratio(self) -> float: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @overload def get_close_matches(word: AnyStr, possibilities: Iterable[AnyStr], n: int = 3, cutoff: float = 0.6) -> list[AnyStr]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/dis.pyi b/crates/red_knot/vendor/typeshed/stdlib/dis.pyi index 796d81d8bf701..47c63cc8b3d3d 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/dis.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/dis.pyi @@ -47,7 +47,22 @@ if sys.version_info >= (3, 11): col_offset: int | None = None end_col_offset: int | None = None -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 13): + class _Instruction(NamedTuple): + opname: str + opcode: int + arg: int | None + argval: Any + argrepr: str + offset: int + start_offset: int + starts_line: bool + line_number: int | None + label: int | None = None + positions: Positions | None = None + cache_info: list[tuple[str, int, Any]] | None = None + +elif sys.version_info >= (3, 11): class _Instruction(NamedTuple): opname: str opcode: int diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/archive_util.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/archive_util.pyi index a8947ce35c602..16684ff069568 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/archive_util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/archive_util.pyi @@ -1,20 +1,35 @@ +from _typeshed import StrOrBytesPath, StrPath +from typing import Literal, overload + +@overload def make_archive( base_name: str, format: str, - root_dir: str | None = None, + root_dir: StrOrBytesPath | None = None, base_dir: str | None = None, - verbose: int = 0, - dry_run: int = 0, + verbose: bool | Literal[0, 1] = 0, + dry_run: bool | Literal[0, 1] = 0, + owner: str | None = None, + group: str | None = None, +) -> str: ... +@overload +def make_archive( + base_name: StrPath, + format: str, + root_dir: StrOrBytesPath, + base_dir: str | None = None, + verbose: bool | Literal[0, 1] = 0, + dry_run: bool | Literal[0, 1] = 0, owner: str | None = None, group: str | None = None, ) -> str: ... def make_tarball( base_name: str, - base_dir: str, + base_dir: StrPath, compress: str | None = "gzip", - verbose: int = 0, - dry_run: int = 0, + verbose: bool | Literal[0, 1] = 0, + dry_run: bool | Literal[0, 1] = 0, owner: str | None = None, group: str | None = None, ) -> str: ... -def make_zipfile(base_name: str, base_dir: str, verbose: int = 0, dry_run: int = 0) -> str: ... +def make_zipfile(base_name: str, base_dir: str, verbose: bool | Literal[0, 1] = 0, dry_run: bool | Literal[0, 1] = 0) -> str: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/ccompiler.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/ccompiler.pyi index cc097728f77c9..cd6efee0a2103 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/ccompiler.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/ccompiler.pyi @@ -1,5 +1,7 @@ -from collections.abc import Callable -from typing import Any +from _typeshed import BytesPath, StrPath +from collections.abc import Callable, Iterable +from distutils.file_util import _BytesPathT, _StrPathT +from typing import Any, Literal, overload from typing_extensions import TypeAlias _Macro: TypeAlias = tuple[str] | tuple[str, str | None] @@ -10,7 +12,11 @@ def gen_lib_options( def gen_preprocess_options(macros: list[_Macro], include_dirs: list[str]) -> list[str]: ... def get_default_compiler(osname: str | None = None, platform: str | None = None) -> str: ... def new_compiler( - plat: str | None = None, compiler: str | None = None, verbose: int = 0, dry_run: int = 0, force: int = 0 + plat: str | None = None, + compiler: str | None = None, + verbose: bool | Literal[0, 1] = 0, + dry_run: bool | Literal[0, 1] = 0, + force: bool | Literal[0, 1] = 0, ) -> CCompiler: ... def show_compilers() -> None: ... @@ -25,7 +31,9 @@ class CCompiler: library_dirs: list[str] runtime_library_dirs: list[str] objects: list[str] - def __init__(self, verbose: int = 0, dry_run: int = 0, force: int = 0) -> None: ... + def __init__( + self, verbose: bool | Literal[0, 1] = 0, dry_run: bool | Literal[0, 1] = 0, force: bool | Literal[0, 1] = 0 + ) -> None: ... def add_include_dir(self, dir: str) -> None: ... def set_include_dirs(self, dirs: list[str]) -> None: ... def add_library(self, libname: str) -> None: ... @@ -39,7 +47,7 @@ class CCompiler: def add_link_object(self, object: str) -> None: ... def set_link_objects(self, objects: list[str]) -> None: ... def detect_language(self, sources: str | list[str]) -> str | None: ... - def find_library_file(self, dirs: list[str], lib: str, debug: bool = ...) -> str | None: ... + def find_library_file(self, dirs: list[str], lib: str, debug: bool | Literal[0, 1] = 0) -> str | None: ... def has_function( self, funcname: str, @@ -58,7 +66,7 @@ class CCompiler: output_dir: str | None = None, macros: list[_Macro] | None = None, include_dirs: list[str] | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, depends: list[str] | None = None, @@ -68,7 +76,7 @@ class CCompiler: objects: list[str], output_libname: str, output_dir: str | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, target_lang: str | None = None, ) -> None: ... def link( @@ -81,7 +89,7 @@ class CCompiler: library_dirs: list[str] | None = None, runtime_library_dirs: list[str] | None = None, export_symbols: list[str] | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, build_temp: str | None = None, @@ -95,7 +103,7 @@ class CCompiler: libraries: list[str] | None = None, library_dirs: list[str] | None = None, runtime_library_dirs: list[str] | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, target_lang: str | None = None, @@ -109,7 +117,7 @@ class CCompiler: library_dirs: list[str] | None = None, runtime_library_dirs: list[str] | None = None, export_symbols: list[str] | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, build_temp: str | None = None, @@ -124,7 +132,7 @@ class CCompiler: library_dirs: list[str] | None = None, runtime_library_dirs: list[str] | None = None, export_symbols: list[str] | None = None, - debug: bool = ..., + debug: bool | Literal[0, 1] = 0, extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, build_temp: str | None = None, @@ -139,14 +147,27 @@ class CCompiler: extra_preargs: list[str] | None = None, extra_postargs: list[str] | None = None, ) -> None: ... - def executable_filename(self, basename: str, strip_dir: int = 0, output_dir: str = "") -> str: ... - def library_filename(self, libname: str, lib_type: str = "static", strip_dir: int = 0, output_dir: str = "") -> str: ... - def object_filenames(self, source_filenames: list[str], strip_dir: int = 0, output_dir: str = "") -> list[str]: ... - def shared_object_filename(self, basename: str, strip_dir: int = 0, output_dir: str = "") -> str: ... + @overload + def executable_filename(self, basename: str, strip_dir: Literal[0, False] = 0, output_dir: StrPath = "") -> str: ... + @overload + def executable_filename(self, basename: StrPath, strip_dir: Literal[1, True], output_dir: StrPath = "") -> str: ... + def library_filename( + self, libname: str, lib_type: str = "static", strip_dir: bool | Literal[0, 1] = 0, output_dir: StrPath = "" + ) -> str: ... + def object_filenames( + self, source_filenames: Iterable[StrPath], strip_dir: bool | Literal[0, 1] = 0, output_dir: StrPath | None = "" + ) -> list[str]: ... + @overload + def shared_object_filename(self, basename: str, strip_dir: Literal[0, False] = 0, output_dir: StrPath = "") -> str: ... + @overload + def shared_object_filename(self, basename: StrPath, strip_dir: Literal[1, True], output_dir: StrPath = "") -> str: ... def execute(self, func: Callable[..., object], args: tuple[Any, ...], msg: str | None = None, level: int = 1) -> None: ... def spawn(self, cmd: list[str]) -> None: ... def mkpath(self, name: str, mode: int = 0o777) -> None: ... - def move_file(self, src: str, dst: str) -> str: ... + @overload + def move_file(self, src: StrPath, dst: _StrPathT) -> _StrPathT | str: ... + @overload + def move_file(self, src: BytesPath, dst: _BytesPathT) -> _BytesPathT | bytes: ... def announce(self, msg: str, level: int = 1) -> None: ... def warn(self, msg: str) -> None: ... def debug_print(self, msg: str) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/cmd.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/cmd.pyi index 61fce37b80bcd..defea50e78dc2 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/cmd.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/cmd.pyi @@ -1,12 +1,14 @@ -from _typeshed import Incomplete +from _typeshed import BytesPath, Incomplete, StrOrBytesPath, StrPath, Unused from abc import abstractmethod from collections.abc import Callable, Iterable from distutils.dist import Distribution -from typing import Any +from distutils.file_util import _BytesPathT, _StrPathT +from typing import Any, ClassVar, Literal, overload class Command: distribution: Distribution - sub_commands: list[tuple[str, Callable[[Command], bool] | None]] + # Any to work around variance issues + sub_commands: ClassVar[list[tuple[str, Callable[[Any], bool] | None]]] def __init__(self, dist: Distribution) -> None: ... @abstractmethod def initialize_options(self) -> None: ... @@ -22,32 +24,63 @@ class Command: def ensure_dirname(self, option: str) -> None: ... def get_command_name(self) -> str: ... def set_undefined_options(self, src_cmd: str, *option_pairs: tuple[str, str]) -> None: ... - def get_finalized_command(self, command: str, create: int = 1) -> Command: ... - def reinitialize_command(self, command: Command | str, reinit_subcommands: int = 0) -> Command: ... + def get_finalized_command(self, command: str, create: bool | Literal[0, 1] = 1) -> Command: ... + def reinitialize_command(self, command: Command | str, reinit_subcommands: bool | Literal[0, 1] = 0) -> Command: ... def run_command(self, command: str) -> None: ... def get_sub_commands(self) -> list[str]: ... def warn(self, msg: str) -> None: ... def execute(self, func: Callable[..., object], args: Iterable[Any], msg: str | None = None, level: int = 1) -> None: ... def mkpath(self, name: str, mode: int = 0o777) -> None: ... + @overload def copy_file( - self, infile: str, outfile: str, preserve_mode: int = 1, preserve_times: int = 1, link: str | None = None, level: Any = 1 - ) -> tuple[str, bool]: ... # level is not used + self, + infile: StrPath, + outfile: _StrPathT, + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + link: str | None = None, + level: Unused = 1, + ) -> tuple[_StrPathT | str, bool]: ... + @overload + def copy_file( + self, + infile: BytesPath, + outfile: _BytesPathT, + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + link: str | None = None, + level: Unused = 1, + ) -> tuple[_BytesPathT | bytes, bool]: ... def copy_tree( self, - infile: str, + infile: StrPath, outfile: str, - preserve_mode: int = 1, - preserve_times: int = 1, - preserve_symlinks: int = 0, - level: Any = 1, - ) -> list[str]: ... # level is not used - def move_file(self, src: str, dst: str, level: Any = 1) -> str: ... # level is not used - def spawn(self, cmd: Iterable[str], search_path: int = 1, level: Any = 1) -> None: ... # level is not used + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + preserve_symlinks: bool | Literal[0, 1] = 0, + level: Unused = 1, + ) -> list[str]: ... + @overload + def move_file(self, src: StrPath, dst: _StrPathT, level: Unused = 1) -> _StrPathT | str: ... + @overload + def move_file(self, src: BytesPath, dst: _BytesPathT, level: Unused = 1) -> _BytesPathT | bytes: ... + def spawn(self, cmd: Iterable[str], search_path: bool | Literal[0, 1] = 1, level: Unused = 1) -> None: ... + @overload def make_archive( self, base_name: str, format: str, - root_dir: str | None = None, + root_dir: StrOrBytesPath | None = None, + base_dir: str | None = None, + owner: str | None = None, + group: str | None = None, + ) -> str: ... + @overload + def make_archive( + self, + base_name: StrPath, + format: str, + root_dir: StrOrBytesPath, base_dir: str | None = None, owner: str | None = None, group: str | None = None, @@ -55,12 +88,12 @@ class Command: def make_file( self, infiles: str | list[str] | tuple[str, ...], - outfile: str, + outfile: StrOrBytesPath, func: Callable[..., object], args: list[Any], exec_msg: str | None = None, skip_msg: str | None = None, - level: Any = 1, - ) -> None: ... # level is not used + level: Unused = 1, + ) -> None: ... def ensure_finalized(self) -> None: ... def dump_options(self, header: Incomplete | None = None, indent: str = "") -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/bdist_msi.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/bdist_msi.pyi index fa98e86d592a4..d1eb374ff52bd 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/bdist_msi.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/bdist_msi.pyi @@ -1,5 +1,5 @@ import sys -from typing import Any +from typing import Any, Literal from ..cmd import Command @@ -9,9 +9,9 @@ if sys.platform == "win32": class PyDialog(Dialog): def __init__(self, *args, **kw) -> None: ... def title(self, title) -> None: ... - def back(self, title, next, name: str = "Back", active: int = 1): ... - def cancel(self, title, next, name: str = "Cancel", active: int = 1): ... - def next(self, title, next, name: str = "Next", active: int = 1): ... + def back(self, title, next, name: str = "Back", active: bool | Literal[0, 1] = 1): ... + def cancel(self, title, next, name: str = "Cancel", active: bool | Literal[0, 1] = 1): ... + def next(self, title, next, name: str = "Next", active: bool | Literal[0, 1] = 1): ... def xbutton(self, name, title, next, xpos): ... class bdist_msi(Command): diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build.pyi index cf3c8a562ff39..31fc036d4f97e 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build.pyi @@ -1,4 +1,5 @@ -from typing import Any +from collections.abc import Callable +from typing import Any, ClassVar from ..cmd import Command @@ -28,4 +29,5 @@ class build(Command): def has_c_libraries(self): ... def has_ext_modules(self): ... def has_scripts(self): ... - sub_commands: Any + # Any to work around variance issues + sub_commands: ClassVar[list[tuple[str, Callable[[Any], bool] | None]]] diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build_py.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build_py.pyi index ca4e4ed7e7972..4c607c6dabe90 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build_py.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/build_py.pyi @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Literal from ..cmd import Command from ..util import Mixin2to3 as Mixin2to3 @@ -32,7 +32,7 @@ class build_py(Command): def find_all_modules(self): ... def get_source_files(self): ... def get_module_outfile(self, build_dir, package, module): ... - def get_outputs(self, include_bytecode: int = 1): ... + def get_outputs(self, include_bytecode: bool | Literal[0, 1] = 1): ... def build_module(self, module, module_file, package): ... def build_modules(self) -> None: ... def build_packages(self) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/check.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/check.pyi index 9cbcc6c87f21d..da041d82587de 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/check.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/check.pyi @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Literal from typing_extensions import TypeAlias from ..cmd import Command @@ -16,7 +16,7 @@ class SilentReporter(_Reporter): report_level, halt_level, stream: Any | None = ..., - debug: int = ..., + debug: bool | Literal[0, 1] = 0, encoding: str = ..., error_handler: str = ..., ) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/config.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/config.pyi index 7077c9a4c158a..391f5a8620383 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/config.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/config.pyi @@ -1,6 +1,7 @@ +from _typeshed import StrOrBytesPath from collections.abc import Sequence from re import Pattern -from typing import Any +from typing import Any, Literal from ..ccompiler import CCompiler from ..cmd import Command @@ -65,8 +66,8 @@ class config(Command): include_dirs: Sequence[str] | None = None, libraries: Sequence[str] | None = None, library_dirs: Sequence[str] | None = None, - decl: int = 0, - call: int = 0, + decl: bool | Literal[0, 1] = 0, + call: bool | Literal[0, 1] = 0, ) -> bool: ... def check_lib( self, @@ -80,4 +81,4 @@ class config(Command): self, header: str, include_dirs: Sequence[str] | None = None, library_dirs: Sequence[str] | None = None, lang: str = "c" ) -> bool: ... -def dump_file(filename: str, head: Any | None = None) -> None: ... +def dump_file(filename: StrOrBytesPath, head: Any | None = None) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/install.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/install.pyi index 661d256e6f078..8b2295d7a3c7e 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/install.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/install.pyi @@ -1,4 +1,5 @@ -from typing import Any +from collections.abc import Callable +from typing import Any, ClassVar from ..cmd import Command @@ -60,4 +61,5 @@ class install(Command): def has_headers(self): ... def has_scripts(self): ... def has_data(self): ... - sub_commands: Any + # Any to work around variance issues + sub_commands: ClassVar[list[tuple[str, Callable[[Any], bool] | None]]] diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/register.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/register.pyi index f88b94113ff42..a5e251d2d01e8 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/register.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/register.pyi @@ -1,10 +1,12 @@ -from typing import Any +from collections.abc import Callable +from typing import Any, ClassVar from ..config import PyPIRCCommand class register(PyPIRCCommand): description: str - sub_commands: Any + # Any to work around variance issues + sub_commands: ClassVar[list[tuple[str, Callable[[Any], bool] | None]]] list_classifiers: int strict: int def initialize_options(self) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/sdist.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/sdist.pyi index 636c4a351d195..db303f77a4634 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/command/sdist.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/command/sdist.pyi @@ -1,4 +1,5 @@ -from typing import Any +from collections.abc import Callable +from typing import Any, ClassVar from ..cmd import Command @@ -11,7 +12,8 @@ class sdist(Command): boolean_options: Any help_options: Any negative_opt: Any - sub_commands: Any + # Any to work around variance issues + sub_commands: ClassVar[list[tuple[str, Callable[[Any], bool] | None]]] READMES: Any template: Any manifest: Any diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/core.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/core.pyi index c41c8ba19a8b2..f3c434df0b1a0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/core.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/core.pyi @@ -3,7 +3,7 @@ from collections.abc import Mapping from distutils.cmd import Command as Command from distutils.dist import Distribution as Distribution from distutils.extension import Extension as Extension -from typing import Any +from typing import Any, Literal USAGE: str @@ -45,7 +45,7 @@ def setup( command_packages: list[str] = ..., command_options: Mapping[str, Mapping[str, tuple[Any, Any]]] = ..., package_data: Mapping[str, list[str]] = ..., - include_package_data: bool = ..., + include_package_data: bool | Literal[0, 1] = ..., libraries: list[str] = ..., headers: list[str] = ..., ext_package: str = ..., diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/dep_util.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/dep_util.pyi index 096ce19d4859f..058377accabcc 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/dep_util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/dep_util.pyi @@ -1,3 +1,14 @@ -def newer(source: str, target: str) -> bool: ... -def newer_pairwise(sources: list[str], targets: list[str]) -> list[tuple[str, str]]: ... -def newer_group(sources: list[str], target: str, missing: str = "error") -> bool: ... +from _typeshed import StrOrBytesPath, SupportsLenAndGetItem +from collections.abc import Iterable +from typing import Literal, TypeVar + +_SourcesT = TypeVar("_SourcesT", bound=StrOrBytesPath) +_TargetsT = TypeVar("_TargetsT", bound=StrOrBytesPath) + +def newer(source: StrOrBytesPath, target: StrOrBytesPath) -> bool | Literal[1]: ... +def newer_pairwise( + sources: SupportsLenAndGetItem[_SourcesT], targets: SupportsLenAndGetItem[_TargetsT] +) -> tuple[list[_SourcesT], list[_TargetsT]]: ... +def newer_group( + sources: Iterable[StrOrBytesPath], target: StrOrBytesPath, missing: Literal["error", "ignore", "newer"] = "error" +) -> Literal[0, 1]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/dir_util.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/dir_util.pyi index 2324a2d50caa4..23e2c3bc28b98 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/dir_util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/dir_util.pyi @@ -1,13 +1,23 @@ -def mkpath(name: str, mode: int = 0o777, verbose: int = 1, dry_run: int = 0) -> list[str]: ... -def create_tree(base_dir: str, files: list[str], mode: int = 0o777, verbose: int = 1, dry_run: int = 0) -> None: ... +from _typeshed import StrOrBytesPath, StrPath +from collections.abc import Iterable +from typing import Literal + +def mkpath(name: str, mode: int = 0o777, verbose: bool | Literal[0, 1] = 1, dry_run: bool | Literal[0, 1] = 0) -> list[str]: ... +def create_tree( + base_dir: StrPath, + files: Iterable[StrPath], + mode: int = 0o777, + verbose: bool | Literal[0, 1] = 1, + dry_run: bool | Literal[0, 1] = 0, +) -> None: ... def copy_tree( - src: str, + src: StrPath, dst: str, - preserve_mode: int = 1, - preserve_times: int = 1, - preserve_symlinks: int = 0, - update: int = 0, - verbose: int = 1, - dry_run: int = 0, + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + preserve_symlinks: bool | Literal[0, 1] = 0, + update: bool | Literal[0, 1] = 0, + verbose: bool | Literal[0, 1] = 1, + dry_run: bool | Literal[0, 1] = 0, ) -> list[str]: ... -def remove_tree(directory: str, verbose: int = 1, dry_run: int = 0) -> None: ... +def remove_tree(directory: StrOrBytesPath, verbose: bool | Literal[0, 1] = 1, dry_run: bool | Literal[0, 1] = 0) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/dist.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/dist.pyi index b296b11f73ba6..4094df9033250 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/dist.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/dist.pyi @@ -1,8 +1,8 @@ -from _typeshed import FileDescriptorOrPath, Incomplete, SupportsWrite +from _typeshed import Incomplete, StrOrBytesPath, StrPath, SupportsWrite from collections.abc import Iterable, Mapping from distutils.cmd import Command from re import Pattern -from typing import IO, Any, ClassVar, TypeVar, overload +from typing import IO, Any, ClassVar, Literal, TypeVar, overload from typing_extensions import TypeAlias command_re: Pattern[str] @@ -11,7 +11,7 @@ _OptionsList: TypeAlias = list[tuple[str, str | None, str, int] | tuple[str, str _CommandT = TypeVar("_CommandT", bound=Command) class DistributionMetadata: - def __init__(self, path: FileDescriptorOrPath | None = None) -> None: ... + def __init__(self, path: StrOrBytesPath | None = None) -> None: ... name: str | None version: str | None author: str | None @@ -30,7 +30,7 @@ class DistributionMetadata: requires: list[str] | None obsoletes: list[str] | None def read_pkg_file(self, file: IO[str]) -> None: ... - def write_pkg_info(self, base_dir: str) -> None: ... + def write_pkg_info(self, base_dir: StrPath) -> None: ... def write_pkg_file(self, file: SupportsWrite[str]) -> None: ... def get_name(self) -> str: ... def get_version(self) -> str: ... @@ -63,7 +63,10 @@ class Distribution: def __init__(self, attrs: Mapping[str, Any] | None = None) -> None: ... def get_option_dict(self, command: str) -> dict[str, tuple[str, str]]: ... def parse_config_files(self, filenames: Iterable[str] | None = None) -> None: ... - def get_command_obj(self, command: str, create: bool = True) -> Command | None: ... + @overload + def get_command_obj(self, command: str, create: Literal[1, True] = 1) -> Command: ... + @overload + def get_command_obj(self, command: str, create: Literal[0, False]) -> Command | None: ... global_options: ClassVar[_OptionsList] common_usage: ClassVar[str] display_options: ClassVar[_OptionsList] diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/file_util.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/file_util.pyi index a97dfca60007c..873d23ea7e500 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/file_util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/file_util.pyi @@ -1,14 +1,38 @@ -from collections.abc import Sequence +from _typeshed import BytesPath, StrOrBytesPath, StrPath +from collections.abc import Iterable +from typing import Literal, TypeVar, overload +_StrPathT = TypeVar("_StrPathT", bound=StrPath) +_BytesPathT = TypeVar("_BytesPathT", bound=BytesPath) + +@overload +def copy_file( + src: StrPath, + dst: _StrPathT, + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + update: bool | Literal[0, 1] = 0, + link: str | None = None, + verbose: bool | Literal[0, 1] = 1, + dry_run: bool | Literal[0, 1] = 0, +) -> tuple[_StrPathT | str, bool]: ... +@overload def copy_file( - src: str, - dst: str, - preserve_mode: bool = ..., - preserve_times: bool = ..., - update: bool = ..., + src: BytesPath, + dst: _BytesPathT, + preserve_mode: bool | Literal[0, 1] = 1, + preserve_times: bool | Literal[0, 1] = 1, + update: bool | Literal[0, 1] = 0, link: str | None = None, - verbose: bool = ..., - dry_run: bool = ..., -) -> tuple[str, str]: ... -def move_file(src: str, dst: str, verbose: bool = ..., dry_run: bool = ...) -> str: ... -def write_file(filename: str, contents: Sequence[str]) -> None: ... + verbose: bool | Literal[0, 1] = 1, + dry_run: bool | Literal[0, 1] = 0, +) -> tuple[_BytesPathT | bytes, bool]: ... +@overload +def move_file( + src: StrPath, dst: _StrPathT, verbose: bool | Literal[0, 1] = 0, dry_run: bool | Literal[0, 1] = 0 +) -> _StrPathT | str: ... +@overload +def move_file( + src: BytesPath, dst: _BytesPathT, verbose: bool | Literal[0, 1] = 0, dry_run: bool | Literal[0, 1] = 0 +) -> _BytesPathT | bytes: ... +def write_file(filename: StrOrBytesPath, contents: Iterable[str]) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/filelist.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/filelist.pyi index 25db2f3cb6cc8..607a78a1fbaca 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/filelist.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/filelist.pyi @@ -23,7 +23,11 @@ class FileList: def include_pattern(self, pattern: str | Pattern[str], *, is_regex: Literal[True, 1]) -> bool: ... @overload def include_pattern( - self, pattern: str | Pattern[str], anchor: bool | Literal[0, 1] = 1, prefix: str | None = None, is_regex: int = 0 + self, + pattern: str | Pattern[str], + anchor: bool | Literal[0, 1] = 1, + prefix: str | None = None, + is_regex: bool | Literal[0, 1] = 0, ) -> bool: ... @overload def exclude_pattern( @@ -33,7 +37,11 @@ class FileList: def exclude_pattern(self, pattern: str | Pattern[str], *, is_regex: Literal[True, 1]) -> bool: ... @overload def exclude_pattern( - self, pattern: str | Pattern[str], anchor: bool | Literal[0, 1] = 1, prefix: str | None = None, is_regex: int = 0 + self, + pattern: str | Pattern[str], + anchor: bool | Literal[0, 1] = 1, + prefix: str | None = None, + is_regex: bool | Literal[0, 1] = 0, ) -> bool: ... def findall(dir: str = ".") -> list[str]: ... @@ -46,5 +54,5 @@ def translate_pattern( def translate_pattern(pattern: str | Pattern[str], *, is_regex: Literal[True, 1]) -> Pattern[str]: ... @overload def translate_pattern( - pattern: str | Pattern[str], anchor: bool | Literal[0, 1] = 1, prefix: str | None = None, is_regex: int = 0 + pattern: str | Pattern[str], anchor: bool | Literal[0, 1] = 1, prefix: str | None = None, is_regex: bool | Literal[0, 1] = 0 ) -> Pattern[str]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/spawn.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/spawn.pyi index a8a2c4140b2d0..50d89aeb9e5fd 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/spawn.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/spawn.pyi @@ -1,2 +1,6 @@ -def spawn(cmd: list[str], search_path: bool = ..., verbose: bool = ..., dry_run: bool = ...) -> None: ... +from typing import Literal + +def spawn( + cmd: list[str], search_path: bool | Literal[0, 1] = 1, verbose: bool | Literal[0, 1] = 0, dry_run: bool | Literal[0, 1] = 0 +) -> None: ... def find_executable(executable: str, path: str | None = None) -> str | None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/sysconfig.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/sysconfig.pyi index e2399a6cf36b9..da72e3275fe38 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/sysconfig.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/sysconfig.pyi @@ -23,8 +23,10 @@ def get_config_vars() -> dict[str, str | int]: ... def get_config_vars(arg: str, /, *args: str) -> list[str | int]: ... def get_config_h_filename() -> str: ... def get_makefile_filename() -> str: ... -def get_python_inc(plat_specific: bool = ..., prefix: str | None = None) -> str: ... -def get_python_lib(plat_specific: bool = ..., standard_lib: bool = ..., prefix: str | None = None) -> str: ... +def get_python_inc(plat_specific: bool | Literal[0, 1] = 0, prefix: str | None = None) -> str: ... +def get_python_lib( + plat_specific: bool | Literal[0, 1] = 0, standard_lib: bool | Literal[0, 1] = 0, prefix: str | None = None +) -> str: ... def customize_compiler(compiler: CCompiler) -> None: ... if sys.version_info < (3, 10): diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/text_file.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/text_file.pyi index 4a6cf1db77c6f..54951af7e55d6 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/text_file.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/text_file.pyi @@ -1,4 +1,4 @@ -from typing import IO +from typing import IO, Literal class TextFile: def __init__( @@ -6,12 +6,12 @@ class TextFile: filename: str | None = None, file: IO[str] | None = None, *, - strip_comments: bool = ..., - lstrip_ws: bool = ..., - rstrip_ws: bool = ..., - skip_blanks: bool = ..., - join_lines: bool = ..., - collapse_join: bool = ..., + strip_comments: bool | Literal[0, 1] = ..., + lstrip_ws: bool | Literal[0, 1] = ..., + rstrip_ws: bool | Literal[0, 1] = ..., + skip_blanks: bool | Literal[0, 1] = ..., + join_lines: bool | Literal[0, 1] = ..., + collapse_join: bool | Literal[0, 1] = ..., ) -> None: ... def open(self, filename: str) -> None: ... def close(self) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/distutils/util.pyi b/crates/red_knot/vendor/typeshed/stdlib/distutils/util.pyi index 835266edde596..515b5b2b86d9f 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/distutils/util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/distutils/util.pyi @@ -5,22 +5,26 @@ from typing import Any, Literal def get_host_platform() -> str: ... def get_platform() -> str: ... def convert_path(pathname: str) -> str: ... -def change_root(new_root: str, pathname: str) -> str: ... +def change_root(new_root: StrPath, pathname: StrPath) -> str: ... def check_environ() -> None: ... def subst_vars(s: str, local_vars: Mapping[str, str]) -> None: ... def split_quoted(s: str) -> list[str]: ... def execute( - func: Callable[..., object], args: tuple[Any, ...], msg: str | None = None, verbose: bool = ..., dry_run: bool = ... + func: Callable[..., object], + args: tuple[Any, ...], + msg: str | None = None, + verbose: bool | Literal[0, 1] = 0, + dry_run: bool | Literal[0, 1] = 0, ) -> None: ... def strtobool(val: str) -> Literal[0, 1]: ... def byte_compile( py_files: list[str], optimize: int = 0, - force: bool = ..., + force: bool | Literal[0, 1] = 0, prefix: str | None = None, base_dir: str | None = None, - verbose: bool = ..., - dry_run: bool = ..., + verbose: bool | Literal[0, 1] = 1, + dry_run: bool | Literal[0, 1] = 0, direct: bool | None = None, ) -> None: ... def rfc822_escape(header: str) -> str: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/faulthandler.pyi b/crates/red_knot/vendor/typeshed/stdlib/faulthandler.pyi index 7b42b8ec84441..320a8b6fad150 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/faulthandler.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/faulthandler.pyi @@ -10,4 +10,4 @@ def is_enabled() -> bool: ... if sys.platform != "win32": def register(signum: int, file: FileDescriptorLike = ..., all_threads: bool = ..., chain: bool = ...) -> None: ... - def unregister(signum: int) -> None: ... + def unregister(signum: int, /) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/filecmp.pyi b/crates/red_knot/vendor/typeshed/stdlib/filecmp.pyi index 4f54a9bff6ee4..5c8232d800d5f 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/filecmp.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/filecmp.pyi @@ -52,6 +52,6 @@ class dircmp(Generic[AnyStr]): def phase4(self) -> None: ... def phase4_closure(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def clear_cache() -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/fileinput.pyi b/crates/red_knot/vendor/typeshed/stdlib/fileinput.pyi index e8d5dd8d2d5ba..1e6aa78e26077 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/fileinput.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/fileinput.pyi @@ -200,7 +200,7 @@ class FileInput(Iterator[AnyStr]): def isfirstline(self) -> bool: ... def isstdin(self) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... if sys.version_info >= (3, 10): def hook_compressed( diff --git a/crates/red_knot/vendor/typeshed/stdlib/functools.pyi b/crates/red_knot/vendor/typeshed/stdlib/functools.pyi index 27550cfe08e64..9957fa8f16349 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/functools.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/functools.pyi @@ -132,7 +132,7 @@ class partial(Generic[_T]): def __new__(cls, func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> Self: ... def __call__(self, /, *args: Any, **kwargs: Any) -> _T: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # With protocols, this could change into a generic protocol that defines __get__ and returns _T _Descriptor: TypeAlias = Any @@ -149,7 +149,7 @@ class partialmethod(Generic[_T]): @property def __isabstractmethod__(self) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class _SingleDispatchCallable(Generic[_T]): registry: types.MappingProxyType[Any, Callable[..., _T]] @@ -196,7 +196,7 @@ class cached_property(Generic[_T_co]): # __set__ is not defined at runtime, but @cached_property is designed to be settable def __set__(self, instance: object, value: _T_co) -> None: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... if sys.version_info >= (3, 9): def cache(user_function: Callable[..., _T], /) -> _lru_cache_wrapper[_T]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/genericpath.pyi b/crates/red_knot/vendor/typeshed/stdlib/genericpath.pyi index 0dd5dec4b2ec8..9d87c48fd5200 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/genericpath.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/genericpath.pyi @@ -20,6 +20,8 @@ __all__ = [ ] if sys.version_info >= (3, 12): __all__ += ["islink"] +if sys.version_info >= (3, 13): + __all__ += ["isjunction", "isdevdrive", "lexists"] # All overloads can return empty string. Ideally, Literal[""] would be a valid # Iterable[T], so that list[T] | Literal[""] could be used as a return @@ -50,3 +52,8 @@ def getctime(filename: FileDescriptorOrPath) -> float: ... def samefile(f1: FileDescriptorOrPath, f2: FileDescriptorOrPath) -> bool: ... def sameopenfile(fp1: int, fp2: int) -> bool: ... def samestat(s1: os.stat_result, s2: os.stat_result) -> bool: ... + +if sys.version_info >= (3, 13): + def isjunction(path: StrOrBytesPath) -> bool: ... + def isdevdrive(path: StrOrBytesPath) -> bool: ... + def lexists(path: StrOrBytesPath) -> bool: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/graphlib.pyi b/crates/red_knot/vendor/typeshed/stdlib/graphlib.pyi index c02d447ad501d..1ca8cbe12b085 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/graphlib.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/graphlib.pyi @@ -23,6 +23,6 @@ class TopologicalSorter(Generic[_T]): def get_ready(self) -> tuple[_T, ...]: ... def static_order(self) -> Iterable[_T]: ... if sys.version_info >= (3, 11): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class CycleError(ValueError): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/gzip.pyi b/crates/red_knot/vendor/typeshed/stdlib/gzip.pyi index 7f43795dd01f1..542945698bba0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/gzip.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/gzip.pyi @@ -12,8 +12,8 @@ _ReadBinaryMode: TypeAlias = Literal["r", "rb"] _WriteBinaryMode: TypeAlias = Literal["a", "ab", "w", "wb", "x", "xb"] _OpenTextMode: TypeAlias = Literal["rt", "at", "wt", "xt"] -READ: Literal[1] # undocumented -WRITE: Literal[2] # undocumented +READ: object # undocumented +WRITE: object # undocumented FTEXT: int # actually Literal[1] # undocumented FHCRC: int # actually Literal[2] # undocumented @@ -86,7 +86,7 @@ class BadGzipFile(OSError): ... class GzipFile(_compression.BaseStream): myfileobj: FileIO | None - mode: Literal[1, 2] + mode: object name: str compress: zlib._Compress fileobj: _ReadableFileobj | _WritableFileobj diff --git a/crates/red_knot/vendor/typeshed/stdlib/http/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/http/__init__.pyi index bb5737cc04810..d455283948d19 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/http/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/http/__init__.pyi @@ -1,6 +1,5 @@ import sys from enum import IntEnum -from typing import Literal if sys.version_info >= (3, 11): from enum import StrEnum @@ -49,11 +48,19 @@ class HTTPStatus(IntEnum): GONE = 410 LENGTH_REQUIRED = 411 PRECONDITION_FAILED = 412 + if sys.version_info >= (3, 13): + CONTENT_TOO_LARGE = 413 REQUEST_ENTITY_TOO_LARGE = 413 + if sys.version_info >= (3, 13): + URI_TOO_LONG = 414 REQUEST_URI_TOO_LONG = 414 UNSUPPORTED_MEDIA_TYPE = 415 + if sys.version_info >= (3, 13): + RANGE_NOT_SATISFIABLE = 416 REQUESTED_RANGE_NOT_SATISFIABLE = 416 EXPECTATION_FAILED = 417 + if sys.version_info >= (3, 13): + UNPROCESSABLE_CONTENT = 422 UNPROCESSABLE_ENTITY = 422 LOCKED = 423 FAILED_DEPENDENCY = 424 @@ -75,9 +82,9 @@ class HTTPStatus(IntEnum): MISDIRECTED_REQUEST = 421 UNAVAILABLE_FOR_LEGAL_REASONS = 451 if sys.version_info >= (3, 9): - EARLY_HINTS: Literal[103] - IM_A_TEAPOT: Literal[418] - TOO_EARLY: Literal[425] + EARLY_HINTS = 103 + IM_A_TEAPOT = 418 + TOO_EARLY = 425 if sys.version_info >= (3, 12): @property def is_informational(self) -> bool: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/http/cookies.pyi b/crates/red_knot/vendor/typeshed/stdlib/http/cookies.pyi index 3d19bb108c2db..c4af5256b5d8b 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/http/cookies.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/http/cookies.pyi @@ -45,7 +45,7 @@ class Morsel(dict[str, Any], Generic[_T]): def __eq__(self, morsel: object) -> bool: ... def __setitem__(self, K: str, V: Any) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class BaseCookie(dict[str, Morsel[_T]], Generic[_T]): def __init__(self, input: _DataType | None = None) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/importlib/metadata/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/importlib/metadata/__init__.pyi index b2fe147770564..56ee205239508 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/importlib/metadata/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/importlib/metadata/__init__.pyi @@ -240,7 +240,10 @@ class DistributionFinder(MetaPathFinder): class MetadataPathFinder(DistributionFinder): @classmethod def find_distributions(cls, context: DistributionFinder.Context = ...) -> Iterable[PathDistribution]: ... - if sys.version_info >= (3, 10): + if sys.version_info >= (3, 11): + @classmethod + def invalidate_caches(cls) -> None: ... + elif sys.version_info >= (3, 10): # Yes, this is an instance method that has a parameter named "cls" def invalidate_caches(cls) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/inspect.pyi b/crates/red_knot/vendor/typeshed/stdlib/inspect.pyi index 0abf16d9d0abd..23e0663d0d60c 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/inspect.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/inspect.pyi @@ -318,6 +318,7 @@ class Signature: def bind(self, *args: Any, **kwargs: Any) -> BoundArguments: ... def bind_partial(self, *args: Any, **kwargs: Any) -> BoundArguments: ... def replace(self, *, parameters: Sequence[Parameter] | type[_void] | None = ..., return_annotation: Any = ...) -> Self: ... + __replace__ = replace if sys.version_info >= (3, 10): @classmethod def from_callable( @@ -332,6 +333,8 @@ class Signature: else: @classmethod def from_callable(cls, obj: _IntrospectableCallable, *, follow_wrapped: bool = True) -> Self: ... + if sys.version_info >= (3, 13): + def format(self, *, max_width: int | None = None) -> str: ... def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... @@ -392,6 +395,9 @@ class Parameter: default: Any = ..., annotation: Any = ..., ) -> Self: ... + if sys.version_info >= (3, 13): + __replace__ = replace + def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/io.pyi b/crates/red_knot/vendor/typeshed/stdlib/io.pyi index fdbbc8dddce9b..01f3bfc06a27a 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/io.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/io.pyi @@ -75,7 +75,7 @@ class IOBase(metaclass=abc.ABCMeta): def __del__(self) -> None: ... @property def closed(self) -> bool: ... - def _checkClosed(self, msg: str | None = ...) -> None: ... # undocumented + def _checkClosed(self) -> None: ... # undocumented class RawIOBase(IOBase): def readall(self) -> bytes: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/ipaddress.pyi b/crates/red_knot/vendor/typeshed/stdlib/ipaddress.pyi index 98b1893d2a8ae..03decc74e65e7 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/ipaddress.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/ipaddress.pyi @@ -147,7 +147,11 @@ class _BaseV4: @property def max_prefixlen(self) -> Literal[32]: ... -class IPv4Address(_BaseV4, _BaseAddress): ... +class IPv4Address(_BaseV4, _BaseAddress): + if sys.version_info >= (3, 13): + @property + def ipv6_mapped(self) -> IPv6Address: ... + class IPv4Network(_BaseV4, _BaseNetwork[IPv4Address]): ... class IPv4Interface(IPv4Address, _BaseInterface[IPv4Address, IPv4Network]): diff --git a/crates/red_knot/vendor/typeshed/stdlib/itertools.pyi b/crates/red_knot/vendor/typeshed/stdlib/itertools.pyi index 264064dcd682f..16e04829c6cf9 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/itertools.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/itertools.pyi @@ -17,6 +17,10 @@ _T3 = TypeVar("_T3") _T4 = TypeVar("_T4") _T5 = TypeVar("_T5") _T6 = TypeVar("_T6") +_T7 = TypeVar("_T7") +_T8 = TypeVar("_T8") +_T9 = TypeVar("_T9") +_T10 = TypeVar("_T10") _Step: TypeAlias = SupportsFloat | SupportsInt | SupportsIndex | SupportsComplex @@ -214,6 +218,60 @@ class product(Iterator[_T_co]): /, ) -> product[tuple[_T1, _T2, _T3, _T4, _T5, _T6]]: ... @overload + def __new__( + cls, + iter1: Iterable[_T1], + iter2: Iterable[_T2], + iter3: Iterable[_T3], + iter4: Iterable[_T4], + iter5: Iterable[_T5], + iter6: Iterable[_T6], + iter7: Iterable[_T7], + /, + ) -> product[tuple[_T1, _T2, _T3, _T4, _T5, _T6, _T7]]: ... + @overload + def __new__( + cls, + iter1: Iterable[_T1], + iter2: Iterable[_T2], + iter3: Iterable[_T3], + iter4: Iterable[_T4], + iter5: Iterable[_T5], + iter6: Iterable[_T6], + iter7: Iterable[_T7], + iter8: Iterable[_T8], + /, + ) -> product[tuple[_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8]]: ... + @overload + def __new__( + cls, + iter1: Iterable[_T1], + iter2: Iterable[_T2], + iter3: Iterable[_T3], + iter4: Iterable[_T4], + iter5: Iterable[_T5], + iter6: Iterable[_T6], + iter7: Iterable[_T7], + iter8: Iterable[_T8], + iter9: Iterable[_T9], + /, + ) -> product[tuple[_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9]]: ... + @overload + def __new__( + cls, + iter1: Iterable[_T1], + iter2: Iterable[_T2], + iter3: Iterable[_T3], + iter4: Iterable[_T4], + iter5: Iterable[_T5], + iter6: Iterable[_T6], + iter7: Iterable[_T7], + iter8: Iterable[_T8], + iter9: Iterable[_T9], + iter10: Iterable[_T10], + /, + ) -> product[tuple[_T1, _T2, _T3, _T4, _T5, _T6, _T7, _T8, _T9, _T10]]: ... + @overload def __new__(cls, *iterables: Iterable[_T1], repeat: int = 1) -> product[tuple[_T1, ...]]: ... def __iter__(self) -> Self: ... def __next__(self) -> _T_co: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/json/encoder.pyi b/crates/red_knot/vendor/typeshed/stdlib/json/encoder.pyi index c1062688bd93e..473398a60b2a0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/json/encoder.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/json/encoder.pyi @@ -10,8 +10,8 @@ INFINITY: float def py_encode_basestring(s: str) -> str: ... # undocumented def py_encode_basestring_ascii(s: str) -> str: ... # undocumented -def encode_basestring(s: str) -> str: ... # undocumented -def encode_basestring_ascii(s: str) -> str: ... # undocumented +def encode_basestring(s: str, /) -> str: ... # undocumented +def encode_basestring_ascii(s: str, /) -> str: ... # undocumented class JSONEncoder: item_separator: str diff --git a/crates/red_knot/vendor/typeshed/stdlib/keyword.pyi b/crates/red_knot/vendor/typeshed/stdlib/keyword.pyi index 5eb7aab85317c..960dfd2fa155a 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/keyword.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/keyword.pyi @@ -7,14 +7,14 @@ if sys.version_info >= (3, 9): else: __all__ = ["iskeyword", "kwlist"] -def iskeyword(s: str) -> bool: ... +def iskeyword(s: str, /) -> bool: ... # a list at runtime, but you're not meant to mutate it; # type it as a sequence kwlist: Final[Sequence[str]] if sys.version_info >= (3, 9): - def issoftkeyword(s: str) -> bool: ... + def issoftkeyword(s: str, /) -> bool: ... # a list at runtime, but you're not meant to mutate it; # type it as a sequence diff --git a/crates/red_knot/vendor/typeshed/stdlib/locale.pyi b/crates/red_knot/vendor/typeshed/stdlib/locale.pyi index c18523e043616..58de654495723 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/locale.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/locale.pyi @@ -96,7 +96,6 @@ __all__ = [ "getpreferredencoding", "Error", "setlocale", - "resetlocale", "localeconv", "strcoll", "strxfrm", @@ -121,6 +120,9 @@ if sys.version_info >= (3, 11): if sys.version_info < (3, 12): __all__ += ["format"] +if sys.version_info < (3, 13): + __all__ += ["resetlocale"] + if sys.platform != "win32": __all__ += ["LC_MESSAGES"] @@ -133,7 +135,9 @@ def getlocale(category: int = ...) -> tuple[_str | None, _str | None]: ... def setlocale(category: int, locale: _str | Iterable[_str | None] | None = None) -> _str: ... def getpreferredencoding(do_setlocale: bool = True) -> _str: ... def normalize(localename: _str) -> _str: ... -def resetlocale(category: int = ...) -> None: ... + +if sys.version_info < (3, 13): + def resetlocale(category: int = ...) -> None: ... if sys.version_info < (3, 12): def format( diff --git a/crates/red_knot/vendor/typeshed/stdlib/logging/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/logging/__init__.pyi index 7ceddfa7ff281..8b19444a5d013 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/logging/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/logging/__init__.pyi @@ -50,7 +50,6 @@ __all__ = [ "makeLogRecord", "setLoggerClass", "shutdown", - "warn", "warning", "getLogRecordFactory", "setLogRecordFactory", @@ -58,6 +57,8 @@ __all__ = [ "raiseExceptions", ] +if sys.version_info < (3, 13): + __all__ += ["warn"] if sys.version_info >= (3, 11): __all__ += ["getLevelNamesMapping"] if sys.version_info >= (3, 12): @@ -156,15 +157,17 @@ class Logger(Filterer): stacklevel: int = 1, extra: Mapping[str, object] | None = None, ) -> None: ... - def warn( - self, - msg: object, - *args: object, - exc_info: _ExcInfoType = None, - stack_info: bool = False, - stacklevel: int = 1, - extra: Mapping[str, object] | None = None, - ) -> None: ... + if sys.version_info < (3, 13): + def warn( + self, + msg: object, + *args: object, + exc_info: _ExcInfoType = None, + stack_info: bool = False, + stacklevel: int = 1, + extra: Mapping[str, object] | None = None, + ) -> None: ... + def error( self, msg: object, @@ -365,12 +368,18 @@ _L = TypeVar("_L", bound=Logger | LoggerAdapter[Any]) class LoggerAdapter(Generic[_L]): logger: _L manager: Manager # undocumented + + if sys.version_info >= (3, 13): + def __init__(self, logger: _L, extra: Mapping[str, object] | None = None, merge_extra: bool = False) -> None: ... + elif sys.version_info >= (3, 10): + def __init__(self, logger: _L, extra: Mapping[str, object] | None = None) -> None: ... + else: + def __init__(self, logger: _L, extra: Mapping[str, object]) -> None: ... + if sys.version_info >= (3, 10): extra: Mapping[str, object] | None - def __init__(self, logger: _L, extra: Mapping[str, object] | None = None) -> None: ... else: extra: Mapping[str, object] - def __init__(self, logger: _L, extra: Mapping[str, object]) -> None: ... def process(self, msg: Any, kwargs: MutableMapping[str, Any]) -> tuple[Any, MutableMapping[str, Any]]: ... def debug( @@ -403,16 +412,18 @@ class LoggerAdapter(Generic[_L]): extra: Mapping[str, object] | None = None, **kwargs: object, ) -> None: ... - def warn( - self, - msg: object, - *args: object, - exc_info: _ExcInfoType = None, - stack_info: bool = False, - stacklevel: int = 1, - extra: Mapping[str, object] | None = None, - **kwargs: object, - ) -> None: ... + if sys.version_info < (3, 13): + def warn( + self, + msg: object, + *args: object, + exc_info: _ExcInfoType = None, + stack_info: bool = False, + stacklevel: int = 1, + extra: Mapping[str, object] | None = None, + **kwargs: object, + ) -> None: ... + def error( self, msg: object, @@ -458,19 +469,32 @@ class LoggerAdapter(Generic[_L]): def getEffectiveLevel(self) -> int: ... def setLevel(self, level: _Level) -> None: ... def hasHandlers(self) -> bool: ... - def _log( - self, - level: int, - msg: object, - args: _ArgsType, - exc_info: _ExcInfoType | None = None, - extra: Mapping[str, object] | None = None, - stack_info: bool = False, - ) -> None: ... # undocumented + if sys.version_info >= (3, 11): + def _log( + self, + level: int, + msg: object, + args: _ArgsType, + *, + exc_info: _ExcInfoType | None = None, + extra: Mapping[str, object] | None = None, + stack_info: bool = False, + ) -> None: ... # undocumented + else: + def _log( + self, + level: int, + msg: object, + args: _ArgsType, + exc_info: _ExcInfoType | None = None, + extra: Mapping[str, object] | None = None, + stack_info: bool = False, + ) -> None: ... # undocumented + @property def name(self) -> str: ... # undocumented if sys.version_info >= (3, 11): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def getLogger(name: str | None = None) -> Logger: ... def getLoggerClass() -> type[Logger]: ... @@ -499,14 +523,17 @@ def warning( stacklevel: int = 1, extra: Mapping[str, object] | None = None, ) -> None: ... -def warn( - msg: object, - *args: object, - exc_info: _ExcInfoType = None, - stack_info: bool = False, - stacklevel: int = 1, - extra: Mapping[str, object] | None = None, -) -> None: ... + +if sys.version_info < (3, 13): + def warn( + msg: object, + *args: object, + exc_info: _ExcInfoType = None, + stack_info: bool = False, + stacklevel: int = 1, + extra: Mapping[str, object] | None = None, + ) -> None: ... + def error( msg: object, *args: object, @@ -600,7 +627,7 @@ class StreamHandler(Handler, Generic[_StreamT]): def __init__(self: StreamHandler[_StreamT], stream: _StreamT) -> None: ... # pyright: ignore[reportInvalidTypeVarUse] #11780 def setStream(self, stream: _StreamT) -> _StreamT | None: ... if sys.version_info >= (3, 11): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class FileHandler(StreamHandler[TextIOWrapper]): baseFilename: str # undocumented diff --git a/crates/red_knot/vendor/typeshed/stdlib/logging/handlers.pyi b/crates/red_knot/vendor/typeshed/stdlib/logging/handlers.pyi index 4c3dc913308cc..4e97012abba11 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/logging/handlers.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/logging/handlers.pyi @@ -46,7 +46,7 @@ class BaseRotatingHandler(FileHandler): def rotate(self, source: str, dest: str) -> None: ... class RotatingFileHandler(BaseRotatingHandler): - maxBytes: str # undocumented + maxBytes: int # undocumented backupCount: int # undocumented if sys.version_info >= (3, 9): def __init__( diff --git a/crates/red_knot/vendor/typeshed/stdlib/mailbox.pyi b/crates/red_knot/vendor/typeshed/stdlib/mailbox.pyi index 1059bfe917e80..2f43f9552652c 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/mailbox.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/mailbox.pyi @@ -102,7 +102,7 @@ class Mailbox(Generic[_MessageT]): @abstractmethod def close(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class Maildir(Mailbox[MaildirMessage]): colon: str @@ -244,7 +244,7 @@ class _ProxyFile(Generic[AnyStr]): @property def closed(self) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class _PartialFile(_ProxyFile[AnyStr]): def __init__(self, f: IO[AnyStr], start: int | None = None, stop: int | None = None) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/marshal.pyi b/crates/red_knot/vendor/typeshed/stdlib/marshal.pyi index 69546344f5bf9..6ab202637ddaa 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/marshal.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/marshal.pyi @@ -1,4 +1,5 @@ import builtins +import sys import types from _typeshed import ReadableBuffer, SupportsRead, SupportsWrite from typing import Any @@ -27,7 +28,14 @@ _Marshallable: TypeAlias = ( | ReadableBuffer ) -def dump(value: _Marshallable, file: SupportsWrite[bytes], version: int = 4, /) -> None: ... -def load(file: SupportsRead[bytes], /) -> Any: ... -def dumps(value: _Marshallable, version: int = 4, /) -> bytes: ... -def loads(bytes: ReadableBuffer, /) -> Any: ... +if sys.version_info >= (3, 13): + def dump(value: _Marshallable, file: SupportsWrite[bytes], version: int = 4, /, *, allow_code: bool = True) -> None: ... + def load(file: SupportsRead[bytes], /, *, allow_code: bool = True) -> Any: ... + def dumps(value: _Marshallable, version: int = 4, /, *, allow_code: bool = True) -> bytes: ... + def loads(bytes: ReadableBuffer, /, *, allow_code: bool = True) -> Any: ... + +else: + def dump(value: _Marshallable, file: SupportsWrite[bytes], version: int = 4, /) -> None: ... + def load(file: SupportsRead[bytes], /) -> Any: ... + def dumps(value: _Marshallable, version: int = 4, /) -> bytes: ... + def loads(bytes: ReadableBuffer, /) -> Any: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/math.pyi b/crates/red_knot/vendor/typeshed/stdlib/math.pyi index 0c2fd4aba7192..2bb61e0669b4d 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/math.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/math.pyi @@ -123,3 +123,6 @@ def trunc(x: _SupportsTrunc[_T], /) -> _T: ... if sys.version_info >= (3, 9): def ulp(x: _SupportsFloatOrIndex, /) -> float: ... + +if sys.version_info >= (3, 13): + def fma(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, z: _SupportsFloatOrIndex, /) -> float: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/mimetypes.pyi b/crates/red_knot/vendor/typeshed/stdlib/mimetypes.pyi index e74b214d3ff10..517193e3516f1 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/mimetypes.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/mimetypes.pyi @@ -1,3 +1,4 @@ +import sys from _typeshed import StrPath from collections.abc import Sequence from typing import IO @@ -18,6 +19,9 @@ __all__ = [ "common_types", ] +if sys.version_info >= (3, 13): + __all__ += ["guess_file_type"] + def guess_type(url: StrPath, strict: bool = True) -> tuple[str | None, str | None]: ... def guess_all_extensions(type: str, strict: bool = True) -> list[str]: ... def guess_extension(type: str, strict: bool = True) -> str | None: ... @@ -25,6 +29,9 @@ def init(files: Sequence[str] | None = None) -> None: ... def read_mime_types(file: str) -> dict[str, str] | None: ... def add_type(type: str, ext: str, strict: bool = True) -> None: ... +if sys.version_info >= (3, 13): + def guess_file_type(path: StrPath, *, strict: bool = True) -> tuple[str | None, str | None]: ... + inited: bool knownfiles: list[str] suffix_map: dict[str, str] @@ -44,3 +51,5 @@ class MimeTypes: def read(self, filename: str, strict: bool = True) -> None: ... def readfp(self, fp: IO[str], strict: bool = True) -> None: ... def read_windows_registry(self, strict: bool = True) -> None: ... + if sys.version_info >= (3, 13): + def guess_file_type(self, path: StrPath, *, strict: bool = True) -> tuple[str | None, str | None]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/mmap.pyi b/crates/red_knot/vendor/typeshed/stdlib/mmap.pyi index 93c4f408e5b6d..2d27e7b2acb2c 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/mmap.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/mmap.pyi @@ -76,6 +76,8 @@ class mmap(Iterable[int], Sized): def __exit__(self, *args: Unused) -> None: ... def __buffer__(self, flags: int, /) -> memoryview: ... def __release_buffer__(self, buffer: memoryview, /) -> None: ... + if sys.version_info >= (3, 13): + def seekable(self) -> bool: ... if sys.platform != "win32": MADV_NORMAL: int diff --git a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/managers.pyi b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/managers.pyi index 02b5c4bc8c676..5d5b9cdcb9135 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/managers.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/managers.pyi @@ -58,7 +58,7 @@ class ValueProxy(BaseProxy, Generic[_T]): def set(self, value: _T) -> None: ... value: _T if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class DictProxy(BaseProxy, MutableMapping[_KT, _VT]): __builtins__: ClassVar[dict[str, Any]] @@ -83,6 +83,8 @@ class DictProxy(BaseProxy, MutableMapping[_KT, _VT]): def keys(self) -> list[_KT]: ... # type: ignore[override] def items(self) -> list[tuple[_KT, _VT]]: ... # type: ignore[override] def values(self) -> list[_VT]: ... # type: ignore[override] + if sys.version_info >= (3, 13): + def __class_getitem__(cls, args: Any, /) -> Any: ... class BaseListProxy(BaseProxy, MutableSequence[_T]): __builtins__: ClassVar[dict[str, Any]] @@ -117,6 +119,8 @@ class BaseListProxy(BaseProxy, MutableSequence[_T]): class ListProxy(BaseListProxy[_T]): def __iadd__(self, value: Iterable[_T], /) -> Self: ... # type: ignore[override] def __imul__(self, value: SupportsIndex, /) -> Self: ... # type: ignore[override] + if sys.version_info >= (3, 13): + def __class_getitem__(cls, args: Any, /) -> Any: ... # Returned by BaseManager.get_server() class Server: diff --git a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/pool.pyi b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/pool.pyi index 465c8e08c1347..d2d611e3ca622 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/pool.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/pool.pyi @@ -21,7 +21,7 @@ class ApplyResult(Generic[_T]): def ready(self) -> bool: ... def successful(self) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # alias created during issue #17805 AsyncResult = ApplyResult diff --git a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/queues.pyi b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/queues.pyi index 4cedd665552ac..581a46ea0bc85 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/queues.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/queues.pyi @@ -38,4 +38,4 @@ class SimpleQueue(Generic[_T]): def get(self) -> _T: ... def put(self, obj: _T) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/shared_memory.pyi b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/shared_memory.pyi index adbe8b943de6c..b63cedf858676 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/shared_memory.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/multiprocessing/shared_memory.pyi @@ -11,7 +11,11 @@ __all__ = ["SharedMemory", "ShareableList"] _SLT = TypeVar("_SLT", int, float, bool, str, bytes, None) class SharedMemory: - def __init__(self, name: str | None = None, create: bool = False, size: int = 0) -> None: ... + if sys.version_info >= (3, 13): + def __init__(self, name: str | None = None, create: bool = False, size: int = 0, *, track: bool = True) -> None: ... + else: + def __init__(self, name: str | None = None, create: bool = False, size: int = 0) -> None: ... + @property def buf(self) -> memoryview: ... @property @@ -37,4 +41,4 @@ class ShareableList(Generic[_SLT]): def count(self, value: _SLT) -> int: ... def index(self, value: _SLT) -> int: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/ntpath.pyi b/crates/red_knot/vendor/typeshed/stdlib/ntpath.pyi index 079366018bf54..ebe305ef708c2 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/ntpath.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/ntpath.pyi @@ -1,5 +1,5 @@ import sys -from _typeshed import BytesPath, StrPath +from _typeshed import BytesPath, StrOrBytesPath, StrPath from genericpath import ( commonprefix as commonprefix, exists as exists, @@ -47,6 +47,8 @@ from typing_extensions import LiteralString if sys.version_info >= (3, 12): from posixpath import isjunction as isjunction, splitroot as splitroot +if sys.version_info >= (3, 13): + from genericpath import isdevdrive as isdevdrive __all__ = [ "normcase", @@ -90,6 +92,8 @@ __all__ = [ ] if sys.version_info >= (3, 12): __all__ += ["isjunction", "splitroot"] +if sys.version_info >= (3, 13): + __all__ += ["isdevdrive", "isreserved"] altsep: LiteralString @@ -117,3 +121,6 @@ if sys.platform == "win32": else: realpath = abspath + +if sys.version_info >= (3, 13): + def isreserved(path: StrOrBytesPath) -> bool: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/opcode.pyi b/crates/red_knot/vendor/typeshed/stdlib/opcode.pyi index 14bdb76221429..f9f76962f8765 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/opcode.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/opcode.pyi @@ -20,6 +20,8 @@ if sys.version_info >= (3, 12): __all__ += ["hasarg", "hasexc"] else: __all__ += ["hasnargs"] +if sys.version_info >= (3, 13): + __all__ += ["hasjump"] if sys.version_info >= (3, 9): cmp_op: tuple[Literal["<"], Literal["<="], Literal["=="], Literal["!="], Literal[">"], Literal[">="]] @@ -50,10 +52,12 @@ if sys.version_info >= (3, 12): hasexc: list[int] else: hasnargs: list[int] +if sys.version_info >= (3, 13): + hasjump: list[int] opname: list[str] opmap: dict[str, int] -HAVE_ARGUMENT: Literal[90] -EXTENDED_ARG: Literal[144] +HAVE_ARGUMENT: int +EXTENDED_ARG: int def stack_effect(opcode: int, oparg: int | None = None, /, *, jump: bool | None = None) -> int: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/optparse.pyi b/crates/red_knot/vendor/typeshed/stdlib/optparse.pyi index 3474648617c2d..a179c2d1bb3ce 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/optparse.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/optparse.pyi @@ -151,7 +151,7 @@ class OptionContainer: def _create_option_mappings(self) -> None: ... def _share_option_mappings(self, parser: OptionParser) -> None: ... @overload - def add_option(self, opt: Option) -> Option: ... + def add_option(self, opt: Option, /) -> Option: ... @overload def add_option(self, arg: str, /, *args: str | None, **kwargs) -> Option: ... def add_options(self, option_list: Iterable[Option]) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/os/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/os/__init__.pyi index e1c7855c0bb64..31c5d2aa3ee6b 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/os/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/os/__init__.pyi @@ -437,7 +437,7 @@ class DirEntry(Generic[AnyStr]): def stat(self, *, follow_symlinks: bool = True) -> stat_result: ... def __fspath__(self) -> AnyStr: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... if sys.version_info >= (3, 12): def is_junction(self) -> bool: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/pathlib.pyi b/crates/red_knot/vendor/typeshed/stdlib/pathlib.pyi index 0013e221f2e1b..c8c8dde0f33e3 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/pathlib.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/pathlib.pyi @@ -1,4 +1,5 @@ import sys +import types from _typeshed import ( OpenBinaryMode, OpenBinaryModeReading, @@ -14,7 +15,7 @@ from collections.abc import Callable, Generator, Iterator, Sequence from io import BufferedRandom, BufferedReader, BufferedWriter, FileIO, TextIOWrapper from os import PathLike, stat_result from types import TracebackType -from typing import IO, Any, BinaryIO, Literal, overload +from typing import IO, Any, BinaryIO, ClassVar, Literal, overload from typing_extensions import Self, deprecated if sys.version_info >= (3, 9): @@ -22,7 +23,14 @@ if sys.version_info >= (3, 9): __all__ = ["PurePath", "PurePosixPath", "PureWindowsPath", "Path", "PosixPath", "WindowsPath"] +if sys.version_info >= (3, 13): + __all__ += ["UnsupportedOperation"] + class PurePath(PathLike[str]): + if sys.version_info >= (3, 13): + parser: ClassVar[types.ModuleType] + def full_match(self, pattern: StrPath, *, case_sensitive: bool | None = None) -> bool: ... + @property def parts(self) -> tuple[str, ...]: ... @property @@ -94,8 +102,6 @@ class PureWindowsPath(PurePath): ... class Path(PurePath): def __new__(cls, *args: StrPath, **kwargs: Any) -> Self: ... - def __enter__(self) -> Self: ... - def __exit__(self, t: type[BaseException] | None, v: BaseException | None, tb: TracebackType | None) -> None: ... @classmethod def cwd(cls) -> Self: ... if sys.version_info >= (3, 10): @@ -105,17 +111,38 @@ class Path(PurePath): def stat(self) -> stat_result: ... def chmod(self, mode: int) -> None: ... - if sys.version_info >= (3, 12): - def exists(self, *, follow_symlinks: bool = True) -> bool: ... + if sys.version_info >= (3, 13): + @classmethod + def from_uri(cls, uri: str) -> Path: ... + def is_dir(self, *, follow_symlinks: bool = True) -> bool: ... + def is_file(self, *, follow_symlinks: bool = True) -> bool: ... + def read_text(self, encoding: str | None = None, errors: str | None = None, newline: str | None = None) -> str: ... + else: + def __enter__(self) -> Self: ... + def __exit__(self, t: type[BaseException] | None, v: BaseException | None, tb: TracebackType | None) -> None: ... + def is_dir(self) -> bool: ... + def is_file(self) -> bool: ... + def read_text(self, encoding: str | None = None, errors: str | None = None) -> str: ... + + if sys.version_info >= (3, 13): + def glob( + self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False + ) -> Generator[Self, None, None]: ... + def rglob( + self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False + ) -> Generator[Self, None, None]: ... + elif sys.version_info >= (3, 12): def glob(self, pattern: str, *, case_sensitive: bool | None = None) -> Generator[Self, None, None]: ... def rglob(self, pattern: str, *, case_sensitive: bool | None = None) -> Generator[Self, None, None]: ... else: - def exists(self) -> bool: ... def glob(self, pattern: str) -> Generator[Self, None, None]: ... def rglob(self, pattern: str) -> Generator[Self, None, None]: ... - def is_dir(self) -> bool: ... - def is_file(self) -> bool: ... + if sys.version_info >= (3, 12): + def exists(self, *, follow_symlinks: bool = True) -> bool: ... + else: + def exists(self) -> bool: ... + def is_symlink(self) -> bool: ... def is_socket(self) -> bool: ... def is_fifo(self) -> bool: ... @@ -186,8 +213,12 @@ class Path(PurePath): if sys.platform != "win32": # These methods do "exist" on Windows, but they always raise NotImplementedError, # so it's safer to pretend they don't exist - def owner(self) -> str: ... - def group(self) -> str: ... + if sys.version_info >= (3, 13): + def owner(self, *, follow_symlinks: bool = True) -> str: ... + def group(self, *, follow_symlinks: bool = True) -> str: ... + else: + def owner(self) -> str: ... + def group(self) -> str: ... # This method does "exist" on Windows on <3.12, but always raises NotImplementedError # On py312+, it works properly on Windows, as with all other platforms @@ -212,7 +243,6 @@ class Path(PurePath): def absolute(self) -> Self: ... def expanduser(self) -> Self: ... def read_bytes(self) -> bytes: ... - def read_text(self, encoding: str | None = None, errors: str | None = None) -> str: ... def samefile(self, other_path: StrPath) -> bool: ... def write_bytes(self, data: ReadableBuffer) -> int: ... if sys.version_info >= (3, 10): @@ -234,3 +264,6 @@ class Path(PurePath): class PosixPath(Path, PurePosixPath): ... class WindowsPath(Path, PureWindowsPath): ... + +if sys.version_info >= (3, 13): + class UnsupportedOperation(NotImplementedError): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/pdb.pyi b/crates/red_knot/vendor/typeshed/stdlib/pdb.pyi index 4cc708d9d5fe9..487adddd04bf0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/pdb.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/pdb.pyi @@ -55,7 +55,9 @@ class Pdb(Bdb, Cmd): ) -> None: ... def forget(self) -> None: ... def setup(self, f: FrameType | None, tb: TracebackType | None) -> None: ... - def execRcLines(self) -> None: ... + if sys.version_info < (3, 11): + def execRcLines(self) -> None: ... + def bp_commands(self, frame: FrameType) -> bool: ... def interaction(self, frame: FrameType | None, traceback: TracebackType | None) -> None: ... def displayhook(self, obj: object) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/platform.pyi b/crates/red_knot/vendor/typeshed/stdlib/platform.pyi index f0e6d4123e1dc..c47ecdc51df49 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/platform.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/platform.pyi @@ -40,3 +40,28 @@ def platform(aliased: bool = ..., terse: bool = ...) -> str: ... if sys.version_info >= (3, 10): def freedesktop_os_release() -> dict[str, str]: ... + +if sys.version_info >= (3, 13): + class AndroidVer(NamedTuple): + release: str + api_level: int + manufacturer: str + model: str + device: str + is_emulator: bool + + class IOSVersionInfo(NamedTuple): + system: str + release: str + model: str + is_simulator: bool + + def android_ver( + release: str = "", + api_level: int = 0, + manufacturer: str = "", + model: str = "", + device: str = "", + is_emulator: bool = False, + ) -> AndroidVer: ... + def ios_ver(system: str = "", release: str = "", model: str = "", is_simulator: bool = False) -> IOSVersionInfo: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/posixpath.pyi b/crates/red_knot/vendor/typeshed/stdlib/posixpath.pyi index 1fc471ac7d0bc..e5f5fa0d813c1 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/posixpath.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/posixpath.pyi @@ -14,6 +14,9 @@ from genericpath import ( sameopenfile as sameopenfile, samestat as samestat, ) + +if sys.version_info >= (3, 13): + from genericpath import isdevdrive as isdevdrive from os import PathLike from typing import AnyStr, overload from typing_extensions import LiteralString @@ -60,6 +63,8 @@ __all__ = [ ] if sys.version_info >= (3, 12): __all__ += ["isjunction", "splitroot"] +if sys.version_info >= (3, 13): + __all__ += ["isdevdrive"] supports_unicode_filenames: bool # aliases (also in os) diff --git a/crates/red_knot/vendor/typeshed/stdlib/pydoc.pyi b/crates/red_knot/vendor/typeshed/stdlib/pydoc.pyi index 3134de79352d8..1a90eb30efca4 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/pydoc.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/pydoc.pyi @@ -1,5 +1,5 @@ import sys -from _typeshed import OptExcInfo, SupportsWrite +from _typeshed import OptExcInfo, SupportsWrite, Unused from abc import abstractmethod from builtins import list as _list # "list" conflicts with method name from collections.abc import Callable, Container, Mapping, MutableMapping @@ -121,7 +121,7 @@ class HTMLDoc(Doc): def formattree( self, tree: list[tuple[type, tuple[type, ...]] | list[Any]], modname: str, parent: type | None = None ) -> str: ... - def docmodule(self, object: object, name: str | None = None, mod: str | None = None, *ignored: Any) -> str: ... + def docmodule(self, object: object, name: str | None = None, mod: str | None = None, *ignored: Unused) -> str: ... def docclass( self, object: object, @@ -129,22 +129,44 @@ class HTMLDoc(Doc): mod: str | None = None, funcs: Mapping[str, str] = {}, classes: Mapping[str, str] = {}, - *ignored: Any, + *ignored: Unused, ) -> str: ... def formatvalue(self, object: object) -> str: ... - def docroutine( # type: ignore[override] - self, - object: object, - name: str | None = None, - mod: str | None = None, - funcs: Mapping[str, str] = {}, - classes: Mapping[str, str] = {}, - methods: Mapping[str, str] = {}, - cl: type | None = None, - ) -> str: ... - def docproperty(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] - def docother(self, object: object, name: str | None = None, mod: Any | None = None, *ignored: Any) -> str: ... - def docdata(self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + def docother(self, object: object, name: str | None = None, mod: Any | None = None, *ignored: Unused) -> str: ... + if sys.version_info >= (3, 11): + def docroutine( # type: ignore[override] + self, + object: object, + name: str | None = None, + mod: str | None = None, + funcs: Mapping[str, str] = {}, + classes: Mapping[str, str] = {}, + methods: Mapping[str, str] = {}, + cl: type | None = None, + homecls: type | None = None, + ) -> str: ... + def docproperty( + self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None, *ignored: Unused + ) -> str: ... + def docdata( + self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None, *ignored: Unused + ) -> str: ... + else: + def docroutine( # type: ignore[override] + self, + object: object, + name: str | None = None, + mod: str | None = None, + funcs: Mapping[str, str] = {}, + classes: Mapping[str, str] = {}, + methods: Mapping[str, str] = {}, + cl: type | None = None, + ) -> str: ... + def docproperty(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + def docdata(self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + if sys.version_info >= (3, 11): + def parentlink(self, object: type | ModuleType, modname: str) -> str: ... + def index(self, dir: str, shadowed: MutableMapping[str, bool] | None = None) -> str: ... def filelink(self, url: str, path: str) -> str: ... @@ -164,21 +186,48 @@ class TextDoc(Doc): def formattree( self, tree: list[tuple[type, tuple[type, ...]] | list[Any]], modname: str, parent: type | None = None, prefix: str = "" ) -> str: ... - def docmodule(self, object: object, name: str | None = None, mod: Any | None = None) -> str: ... # type: ignore[override] - def docclass(self, object: object, name: str | None = None, mod: str | None = None, *ignored: Any) -> str: ... + def docclass(self, object: object, name: str | None = None, mod: str | None = None, *ignored: Unused) -> str: ... def formatvalue(self, object: object) -> str: ... - def docroutine(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] - def docproperty(self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] - def docdata(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] - def docother( # type: ignore[override] - self, - object: object, - name: str | None = None, - mod: str | None = None, - parent: str | None = None, - maxlen: int | None = None, - doc: Any | None = None, - ) -> str: ... + if sys.version_info >= (3, 11): + def docroutine( # type: ignore[override] + self, + object: object, + name: str | None = None, + mod: str | None = None, + cl: Any | None = None, + homecls: Any | None = None, + ) -> str: ... + def docmodule(self, object: object, name: str | None = None, mod: Any | None = None, *ignored: Unused) -> str: ... + def docproperty( + self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None, *ignored: Unused + ) -> str: ... + def docdata( + self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None, *ignored: Unused + ) -> str: ... + def docother( + self, + object: object, + name: str | None = None, + mod: str | None = None, + parent: str | None = None, + *ignored: Unused, + maxlen: int | None = None, + doc: Any | None = None, + ) -> str: ... + else: + def docroutine(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + def docmodule(self, object: object, name: str | None = None, mod: Any | None = None) -> str: ... # type: ignore[override] + def docproperty(self, object: object, name: str | None = None, mod: Any | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + def docdata(self, object: object, name: str | None = None, mod: str | None = None, cl: Any | None = None) -> str: ... # type: ignore[override] + def docother( # type: ignore[override] + self, + object: object, + name: str | None = None, + mod: str | None = None, + parent: str | None = None, + maxlen: int | None = None, + doc: Any | None = None, + ) -> str: ... def pager(text: str) -> None: ... def getpager() -> Callable[[str], None]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/queue.pyi b/crates/red_knot/vendor/typeshed/stdlib/queue.pyi index d7cae5f2ac79a..16643c99d08df 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/queue.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/queue.pyi @@ -6,6 +6,8 @@ if sys.version_info >= (3, 9): from types import GenericAlias __all__ = ["Empty", "Full", "Queue", "PriorityQueue", "LifoQueue", "SimpleQueue"] +if sys.version_info >= (3, 13): + __all__ += ["ShutDown"] _T = TypeVar("_T") @@ -46,7 +48,7 @@ class Queue(Generic[_T]): def _qsize(self) -> int: ... def task_done(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class PriorityQueue(Queue[_T]): queue: list[_T] @@ -63,4 +65,4 @@ class SimpleQueue(Generic[_T]): def put_nowait(self, item: _T) -> None: ... def qsize(self) -> int: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/random.pyi b/crates/red_knot/vendor/typeshed/stdlib/random.pyi index 9fd1c64f2bba2..e7320369c3778 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/random.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/random.pyi @@ -41,7 +41,10 @@ _T = TypeVar("_T") class Random(_random.Random): VERSION: ClassVar[int] - def __init__(self, x: Any = None) -> None: ... + if sys.version_info >= (3, 9): + def __init__(self, x: int | float | str | bytes | bytearray | None = None) -> None: ... # noqa: Y041 + else: + def __init__(self, x: Any = None) -> None: ... # Using other `seed` types is deprecated since 3.9 and removed in 3.11 # Ignore Y041, since random.seed doesn't treat int like a float subtype. Having an explicit # int better documents conventional usage of random.seed. diff --git a/crates/red_knot/vendor/typeshed/stdlib/re.pyi b/crates/red_knot/vendor/typeshed/stdlib/re.pyi index 7945c5f46cdc5..b06f494c0b7d6 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/re.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/re.pyi @@ -1,5 +1,6 @@ import enum import sre_compile +import sre_constants import sys from _typeshed import ReadableBuffer from collections.abc import Callable, Iterator, Mapping @@ -21,7 +22,6 @@ __all__ = [ "finditer", "compile", "purge", - "template", "escape", "error", "A", @@ -41,10 +41,17 @@ __all__ = [ "Match", "Pattern", ] +if sys.version_info < (3, 13): + __all__ += ["template"] if sys.version_info >= (3, 11): __all__ += ["NOFLAG", "RegexFlag"] +if sys.version_info >= (3, 13): + __all__ += ["PatternError"] + + PatternError = sre_constants.error + _T = TypeVar("_T") @final @@ -102,7 +109,7 @@ class Match(Generic[AnyStr]): def __copy__(self) -> Match[AnyStr]: ... def __deepcopy__(self, memo: Any, /) -> Match[AnyStr]: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @final class Pattern(Generic[AnyStr]): @@ -178,7 +185,7 @@ class Pattern(Generic[AnyStr]): def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # ----- re variables and constants ----- @@ -198,10 +205,11 @@ class RegexFlag(enum.IntFlag): VERBOSE = X U = sre_compile.SRE_FLAG_UNICODE UNICODE = U - T = sre_compile.SRE_FLAG_TEMPLATE - TEMPLATE = T + if sys.version_info < (3, 13): + T = sre_compile.SRE_FLAG_TEMPLATE + TEMPLATE = T if sys.version_info >= (3, 11): - NOFLAG: int + NOFLAG = 0 A = RegexFlag.A ASCII = RegexFlag.ASCII @@ -218,8 +226,9 @@ X = RegexFlag.X VERBOSE = RegexFlag.VERBOSE U = RegexFlag.U UNICODE = RegexFlag.UNICODE -T = RegexFlag.T -TEMPLATE = RegexFlag.TEMPLATE +if sys.version_info < (3, 13): + T = RegexFlag.T + TEMPLATE = RegexFlag.TEMPLATE if sys.version_info >= (3, 11): NOFLAG = RegexFlag.NOFLAG _FlagsType: TypeAlias = int | RegexFlag @@ -287,4 +296,6 @@ def subn( ) -> tuple[bytes, int]: ... def escape(pattern: AnyStr) -> AnyStr: ... def purge() -> None: ... -def template(pattern: AnyStr | Pattern[AnyStr], flags: _FlagsType = 0) -> Pattern[AnyStr]: ... + +if sys.version_info < (3, 13): + def template(pattern: AnyStr | Pattern[AnyStr], flags: _FlagsType = 0) -> Pattern[AnyStr]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/shutil.pyi b/crates/red_knot/vendor/typeshed/stdlib/shutil.pyi index a06181ce876de..f6c8a390d85f5 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/shutil.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/shutil.pyi @@ -1,6 +1,6 @@ import os import sys -from _typeshed import BytesPath, FileDescriptorOrPath, StrOrBytesPath, StrPath, SupportsRead, SupportsWrite +from _typeshed import BytesPath, ExcInfo, FileDescriptorOrPath, StrOrBytesPath, StrPath, SupportsRead, SupportsWrite from collections.abc import Callable, Iterable, Sequence from tarfile import _TarfileFilter from typing import Any, AnyStr, NamedTuple, Protocol, TypeVar, overload @@ -71,14 +71,12 @@ def copytree( dirs_exist_ok: bool = False, ) -> _PathReturn: ... -_OnErrorCallback: TypeAlias = Callable[[Callable[..., Any], str, Any], object] -_OnExcCallback: TypeAlias = Callable[[Callable[..., Any], str, Exception], object] +_OnErrorCallback: TypeAlias = Callable[[Callable[..., Any], str, ExcInfo], object] +_OnExcCallback: TypeAlias = Callable[[Callable[..., Any], str, BaseException], object] class _RmtreeType(Protocol): avoids_symlink_attacks: bool if sys.version_info >= (3, 12): - @overload - def __call__(self, path: StrOrBytesPath, ignore_errors: bool = False, *, dir_fd: int | None = None) -> None: ... @overload @deprecated("The `onerror` parameter is deprecated and will be removed in Python 3.14. Use `onexc` instead.") def __call__( @@ -91,7 +89,12 @@ class _RmtreeType(Protocol): ) -> None: ... @overload def __call__( - self, path: StrOrBytesPath, ignore_errors: bool = False, *, onexc: _OnExcCallback, dir_fd: int | None = None + self, + path: StrOrBytesPath, + ignore_errors: bool = False, + *, + onexc: _OnExcCallback | None = None, + dir_fd: int | None = None, ) -> None: ... elif sys.version_info >= (3, 11): def __call__( @@ -132,14 +135,44 @@ def disk_usage(path: FileDescriptorOrPath) -> _ntuple_diskusage: ... # While chown can be imported on Windows, it doesn't actually work; # see https://bugs.python.org/issue33140. We keep it here because it's # in __all__. -@overload -def chown(path: FileDescriptorOrPath, user: str | int, group: None = None) -> None: ... -@overload -def chown(path: FileDescriptorOrPath, user: None = None, *, group: str | int) -> None: ... -@overload -def chown(path: FileDescriptorOrPath, user: None, group: str | int) -> None: ... -@overload -def chown(path: FileDescriptorOrPath, user: str | int, group: str | int) -> None: ... +if sys.version_info >= (3, 13): + @overload + def chown( + path: FileDescriptorOrPath, + user: str | int, + group: None = None, + *, + dir_fd: int | None = None, + follow_symlinks: bool = True, + ) -> None: ... + @overload + def chown( + path: FileDescriptorOrPath, + user: None = None, + *, + group: str | int, + dir_fd: int | None = None, + follow_symlinks: bool = True, + ) -> None: ... + @overload + def chown( + path: FileDescriptorOrPath, user: None, group: str | int, *, dir_fd: int | None = None, follow_symlinks: bool = True + ) -> None: ... + @overload + def chown( + path: FileDescriptorOrPath, user: str | int, group: str | int, *, dir_fd: int | None = None, follow_symlinks: bool = True + ) -> None: ... + +else: + @overload + def chown(path: FileDescriptorOrPath, user: str | int, group: None = None) -> None: ... + @overload + def chown(path: FileDescriptorOrPath, user: None = None, *, group: str | int) -> None: ... + @overload + def chown(path: FileDescriptorOrPath, user: None, group: str | int) -> None: ... + @overload + def chown(path: FileDescriptorOrPath, user: str | int, group: str | int) -> None: ... + @overload def which(cmd: _StrPathT, mode: int = 1, path: StrPath | None = None) -> str | _StrPathT | None: ... @overload diff --git a/crates/red_knot/vendor/typeshed/stdlib/signal.pyi b/crates/red_knot/vendor/typeshed/stdlib/signal.pyi index cbb7440b9147f..2e3ac5bf24c37 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/signal.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/signal.pyi @@ -181,7 +181,7 @@ else: def strsignal(signalnum: _SIGNUM, /) -> str | None: ... def valid_signals() -> set[Signals]: ... def raise_signal(signalnum: _SIGNUM, /) -> None: ... -def set_wakeup_fd(fd: int, *, warn_on_full_buffer: bool = ...) -> int: ... +def set_wakeup_fd(fd: int, /, *, warn_on_full_buffer: bool = ...) -> int: ... if sys.version_info >= (3, 9): if sys.platform == "linux": diff --git a/crates/red_knot/vendor/typeshed/stdlib/sqlite3/dbapi2.pyi b/crates/red_knot/vendor/typeshed/stdlib/sqlite3/dbapi2.pyi index 068ce1514c3c5..3cb4b93e88fe1 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/sqlite3/dbapi2.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/sqlite3/dbapi2.pyi @@ -428,7 +428,11 @@ class Connection: def executemany(self, sql: str, parameters: Iterable[_Parameters], /) -> Cursor: ... def executescript(self, sql_script: str, /) -> Cursor: ... def interrupt(self) -> None: ... - def iterdump(self) -> Generator[str, None, None]: ... + if sys.version_info >= (3, 13): + def iterdump(self, *, filter: str | None = None) -> Generator[str, None, None]: ... + else: + def iterdump(self) -> Generator[str, None, None]: ... + def rollback(self) -> None: ... def set_authorizer( self, authorizer_callback: Callable[[int, str | None, str | None, str | None, str | None], int] | None diff --git a/crates/red_knot/vendor/typeshed/stdlib/stat.pyi b/crates/red_knot/vendor/typeshed/stdlib/stat.pyi index 4518acb5a1621..f3bdd92c1068a 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/stat.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/stat.pyi @@ -1 +1,7 @@ +import sys from _stat import * +from typing import Literal + +if sys.version_info >= (3, 13): + # https://github.com/python/cpython/issues/114081#issuecomment-2119017790 + SF_RESTRICTED: Literal[0x00080000] diff --git a/crates/red_knot/vendor/typeshed/stdlib/statistics.pyi b/crates/red_knot/vendor/typeshed/stdlib/statistics.pyi index c5f5ed64b3289..c8ecbbceab1ab 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/statistics.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/statistics.pyi @@ -1,6 +1,6 @@ import sys from _typeshed import SupportsRichComparisonT -from collections.abc import Hashable, Iterable, Sequence +from collections.abc import Callable, Hashable, Iterable, Sequence from decimal import Decimal from fractions import Fraction from typing import Any, Literal, NamedTuple, SupportsFloat, TypeVar @@ -28,6 +28,8 @@ __all__ = [ if sys.version_info >= (3, 10): __all__ += ["covariance", "correlation", "linear_regression"] +if sys.version_info >= (3, 13): + __all__ += ["kde", "kde_random"] # Most functions in this module accept homogeneous collections of one of these types _Number: TypeAlias = float | Decimal | Fraction @@ -130,3 +132,30 @@ if sys.version_info >= (3, 11): elif sys.version_info >= (3, 10): def linear_regression(regressor: Sequence[_Number], dependent_variable: Sequence[_Number], /) -> LinearRegression: ... + +if sys.version_info >= (3, 13): + _Kernel: TypeAlias = Literal[ + "normal", + "gauss", + "logistic", + "sigmoid", + "rectangular", + "uniform", + "triangular", + "parabolic", + "epanechnikov", + "quartic", + "biweight", + "triweight", + "cosine", + ] + def kde( + data: Sequence[float], h: float, kernel: _Kernel = "normal", *, cumulative: bool = False + ) -> Callable[[float], float]: ... + def kde_random( + data: Sequence[float], + h: float, + kernel: _Kernel = "normal", + *, + seed: int | float | str | bytes | bytearray | None = None, # noqa: Y041 + ) -> Callable[[], float]: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/subprocess.pyi b/crates/red_knot/vendor/typeshed/stdlib/subprocess.pyi index d3302aba5e102..6234ecc02b483 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/subprocess.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/subprocess.pyi @@ -88,7 +88,7 @@ class CompletedProcess(Generic[_T]): def __init__(self, args: _CMD, returncode: int, stdout: _T | None = None, stderr: _T | None = None) -> None: ... def check_returncode(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... if sys.version_info >= (3, 11): # 3.11 adds "process_group" argument @@ -2560,7 +2560,7 @@ class Popen(Generic[AnyStr]): ) -> None: ... def __del__(self) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # The result really is always a str. if sys.version_info >= (3, 11): diff --git a/crates/red_knot/vendor/typeshed/stdlib/sys/__init__.pyi b/crates/red_knot/vendor/typeshed/stdlib/sys/__init__.pyi index 5867c9a9d5100..9989a27b2bc17 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/sys/__init__.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/sys/__init__.pyi @@ -1,5 +1,5 @@ import sys -from _typeshed import OptExcInfo, ProfileFunction, TraceFunction, structseq +from _typeshed import MaybeNone, OptExcInfo, ProfileFunction, TraceFunction, structseq from _typeshed.importlib import MetaPathFinderProtocol, PathEntryFinderProtocol from builtins import object as _object from collections.abc import AsyncGenerator, Callable, Sequence @@ -56,23 +56,24 @@ ps2: object # TextIO is used instead of more specific types for the standard streams, # since they are often monkeypatched at runtime. At startup, the objects -# are initialized to instances of TextIOWrapper. +# are initialized to instances of TextIOWrapper, but can also be None under +# some circumstances. # # To use methods from TextIOWrapper, use an isinstance check to ensure that # the streams have not been overridden: # # if isinstance(sys.stdout, io.TextIOWrapper): # sys.stdout.reconfigure(...) -stdin: TextIO -stdout: TextIO -stderr: TextIO +stdin: TextIO | MaybeNone +stdout: TextIO | MaybeNone +stderr: TextIO | MaybeNone if sys.version_info >= (3, 10): stdlib_module_names: frozenset[str] -__stdin__: Final[TextIOWrapper] # Contains the original value of stdin -__stdout__: Final[TextIOWrapper] # Contains the original value of stdout -__stderr__: Final[TextIOWrapper] # Contains the original value of stderr +__stdin__: Final[TextIOWrapper | None] # Contains the original value of stdin +__stdout__: Final[TextIOWrapper | None] # Contains the original value of stdout +__stderr__: Final[TextIOWrapper | None] # Contains the original value of stderr tracebacklimit: int version: str api_version: int @@ -264,9 +265,9 @@ def getrecursionlimit() -> int: ... def getsizeof(obj: object, default: int = ...) -> int: ... def getswitchinterval() -> float: ... def getprofile() -> ProfileFunction | None: ... -def setprofile(profilefunc: ProfileFunction | None) -> None: ... +def setprofile(function: ProfileFunction | None, /) -> None: ... def gettrace() -> TraceFunction | None: ... -def settrace(tracefunc: TraceFunction | None) -> None: ... +def settrace(function: TraceFunction | None, /) -> None: ... if sys.platform == "win32": # A tuple of length 5, even though it has more than 5 attributes. diff --git a/crates/red_knot/vendor/typeshed/stdlib/syslog.pyi b/crates/red_knot/vendor/typeshed/stdlib/syslog.pyi index 02876e0b7e850..d539dd5e4579f 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/syslog.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/syslog.pyi @@ -35,6 +35,15 @@ if sys.platform != "win32": LOG_USER: Literal[8] LOG_UUCP: Literal[64] LOG_WARNING: Literal[4] + + if sys.version_info >= (3, 13): + LOG_FTP: Literal[88] + LOG_INSTALL: Literal[112] + LOG_LAUNCHD: Literal[192] + LOG_NETINFO: Literal[96] + LOG_RAS: Literal[120] + LOG_REMOTEAUTH: Literal[104] + def LOG_MASK(pri: int, /) -> int: ... def LOG_UPTO(pri: int, /) -> int: ... def closelog() -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/tarfile.pyi b/crates/red_knot/vendor/typeshed/stdlib/tarfile.pyi index b6fe454eff78a..e520994641744 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/tarfile.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/tarfile.pyi @@ -142,22 +142,43 @@ class TarFile: errorlevel: int | None offset: int # undocumented extraction_filter: _FilterFunction | None - def __init__( - self, - name: StrOrBytesPath | None = None, - mode: Literal["r", "a", "w", "x"] = "r", - fileobj: _Fileobj | None = None, - format: int | None = None, - tarinfo: type[TarInfo] | None = None, - dereference: bool | None = None, - ignore_zeros: bool | None = None, - encoding: str | None = None, - errors: str = "surrogateescape", - pax_headers: Mapping[str, str] | None = None, - debug: int | None = None, - errorlevel: int | None = None, - copybufsize: int | None = None, # undocumented - ) -> None: ... + if sys.version_info >= (3, 13): + stream: bool + def __init__( + self, + name: StrOrBytesPath | None = None, + mode: Literal["r", "a", "w", "x"] = "r", + fileobj: _Fileobj | None = None, + format: int | None = None, + tarinfo: type[TarInfo] | None = None, + dereference: bool | None = None, + ignore_zeros: bool | None = None, + encoding: str | None = None, + errors: str = "surrogateescape", + pax_headers: Mapping[str, str] | None = None, + debug: int | None = None, + errorlevel: int | None = None, + copybufsize: int | None = None, # undocumented + stream: bool = False, + ) -> None: ... + else: + def __init__( + self, + name: StrOrBytesPath | None = None, + mode: Literal["r", "a", "w", "x"] = "r", + fileobj: _Fileobj | None = None, + format: int | None = None, + tarinfo: type[TarInfo] | None = None, + dereference: bool | None = None, + ignore_zeros: bool | None = None, + encoding: str | None = None, + errors: str = "surrogateescape", + pax_headers: Mapping[str, str] | None = None, + debug: int | None = None, + errorlevel: int | None = None, + copybufsize: int | None = None, # undocumented + ) -> None: ... + def __enter__(self) -> Self: ... def __exit__( self, type: type[BaseException] | None, value: BaseException | None, traceback: TracebackType | None diff --git a/crates/red_knot/vendor/typeshed/stdlib/tempfile.pyi b/crates/red_knot/vendor/typeshed/stdlib/tempfile.pyi index b663699264048..3ae8cca39f77e 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/tempfile.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/tempfile.pyi @@ -398,7 +398,7 @@ class SpooledTemporaryFile(IO[AnyStr], _SpooledTemporaryFileBase): def writable(self) -> bool: ... def __next__(self) -> AnyStr: ... # type: ignore[override] if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class TemporaryDirectory(Generic[AnyStr]): name: AnyStr @@ -457,7 +457,7 @@ class TemporaryDirectory(Generic[AnyStr]): def __enter__(self) -> AnyStr: ... def __exit__(self, exc: type[BaseException] | None, value: BaseException | None, tb: TracebackType | None) -> None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # The overloads overlap, but they should still work fine. @overload diff --git a/crates/red_knot/vendor/typeshed/stdlib/threading.pyi b/crates/red_knot/vendor/typeshed/stdlib/threading.pyi index 90b6cabb52377..1ecadef508d00 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/threading.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/threading.pyi @@ -109,6 +109,9 @@ class Lock: def acquire(self, blocking: bool = ..., timeout: float = ...) -> bool: ... def release(self) -> None: ... def locked(self) -> bool: ... + def acquire_lock(self, blocking: bool = ..., timeout: float = ...) -> bool: ... # undocumented + def release_lock(self) -> None: ... # undocumented + def locked_lock(self) -> bool: ... # undocumented @final class _RLock: diff --git a/crates/red_knot/vendor/typeshed/stdlib/time.pyi b/crates/red_knot/vendor/typeshed/stdlib/time.pyi index b7962f0751d60..71cdc4d78fdc4 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/time.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/time.pyi @@ -27,6 +27,9 @@ if sys.platform != "win32": if sys.platform == "darwin": CLOCK_UPTIME_RAW: int + if sys.version_info >= (3, 13): + CLOCK_UPTIME_RAW_APPROX: int + CLOCK_MONOTONIC_RAW_APPROX: int if sys.version_info >= (3, 9) and sys.platform == "linux": CLOCK_TAI: int @@ -94,7 +97,7 @@ if sys.platform != "win32": def clock_settime(clk_id: int, time: float, /) -> None: ... # Unix only if sys.platform != "win32": - def clock_gettime_ns(clock_id: int, /) -> int: ... + def clock_gettime_ns(clk_id: int, /) -> int: ... def clock_settime_ns(clock_id: int, time: int, /) -> int: ... if sys.platform == "linux": diff --git a/crates/red_knot/vendor/typeshed/stdlib/token.pyi b/crates/red_knot/vendor/typeshed/stdlib/token.pyi index f1fec7698043c..668987d7c2bfd 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/token.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/token.pyi @@ -3,10 +3,8 @@ import sys __all__ = [ "AMPER", "AMPEREQUAL", - "ASYNC", "AT", "ATEQUAL", - "AWAIT", "CIRCUMFLEX", "CIRCUMFLEXEQUAL", "COLON", @@ -71,6 +69,8 @@ __all__ = [ "NL", "COMMENT", ] +if sys.version_info < (3, 13): + __all__ += ["ASYNC", "AWAIT"] if sys.version_info >= (3, 10): __all__ += ["SOFT_KEYWORD"] @@ -131,8 +131,9 @@ AT: int RARROW: int ELLIPSIS: int ATEQUAL: int -AWAIT: int -ASYNC: int +if sys.version_info < (3, 13): + AWAIT: int + ASYNC: int OP: int ERRORTOKEN: int N_TOKENS: int diff --git a/crates/red_knot/vendor/typeshed/stdlib/tokenize.pyi b/crates/red_knot/vendor/typeshed/stdlib/tokenize.pyi index 3cd9ab8f87ceb..3d2a93865df8e 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/tokenize.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/tokenize.pyi @@ -10,10 +10,8 @@ from typing_extensions import TypeAlias __all__ = [ "AMPER", "AMPEREQUAL", - "ASYNC", "AT", "ATEQUAL", - "AWAIT", "CIRCUMFLEX", "CIRCUMFLEXEQUAL", "COLON", @@ -83,6 +81,8 @@ __all__ = [ "tokenize", "untokenize", ] +if sys.version_info < (3, 13): + __all__ += ["ASYNC", "AWAIT"] if sys.version_info >= (3, 10): __all__ += ["SOFT_KEYWORD"] @@ -90,6 +90,9 @@ if sys.version_info >= (3, 10): if sys.version_info >= (3, 12): __all__ += ["EXCLAMATION", "FSTRING_END", "FSTRING_MIDDLE", "FSTRING_START"] +if sys.version_info >= (3, 13): + __all__ += ["TokenError", "open"] + cookie_re: Pattern[str] blank_re: Pattern[bytes] @@ -110,7 +113,9 @@ class TokenInfo(_TokenInfo): _Token: TypeAlias = TokenInfo | Sequence[int | str | _Position] class TokenError(Exception): ... -class StopTokenizing(Exception): ... # undocumented + +if sys.version_info < (3, 13): + class StopTokenizing(Exception): ... # undocumented class Untokenizer: tokens: list[str] @@ -120,6 +125,8 @@ class Untokenizer: def add_whitespace(self, start: _Position) -> None: ... def untokenize(self, iterable: Iterable[_Token]) -> str: ... def compat(self, token: Sequence[int | str], iterable: Iterable[_Token]) -> None: ... + if sys.version_info >= (3, 12): + def escape_brackets(self, token: str) -> str: ... # the docstring says "returns bytes" but is incorrect -- # if the ENCODING token is missing, it skips the encode diff --git a/crates/red_knot/vendor/typeshed/stdlib/traceback.pyi b/crates/red_knot/vendor/typeshed/stdlib/traceback.pyi index 39803003cfe55..075c0f4b9de8f 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/traceback.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/traceback.pyi @@ -3,7 +3,7 @@ from _typeshed import SupportsWrite, Unused from collections.abc import Generator, Iterable, Iterator, Mapping from types import FrameType, TracebackType from typing import Any, Literal, overload -from typing_extensions import Self, TypeAlias +from typing_extensions import Self, TypeAlias, deprecated __all__ = [ "extract_stack", @@ -85,7 +85,13 @@ def format_list(extracted_list: Iterable[FrameSummary | _FrameSummaryTuple]) -> # undocumented def print_list(extracted_list: Iterable[FrameSummary | _FrameSummaryTuple], file: SupportsWrite[str] | None = None) -> None: ... -if sys.version_info >= (3, 10): +if sys.version_info >= (3, 13): + @overload + def format_exception_only(exc: BaseException | None, /, *, show_group: bool = False) -> list[str]: ... + @overload + def format_exception_only(exc: Unused, /, value: BaseException | None, *, show_group: bool = False) -> list[str]: ... + +elif sys.version_info >= (3, 10): @overload def format_exception_only(exc: BaseException | None, /) -> list[str]: ... @overload @@ -111,13 +117,20 @@ class TracebackException: __context__: TracebackException __suppress_context__: bool stack: StackSummary - exc_type: type[BaseException] filename: str lineno: int text: str offset: int msg: str - if sys.version_info >= (3, 11): + if sys.version_info >= (3, 13): + @property + def exc_type_str(self) -> str: ... + @property + @deprecated("Deprecated in 3.13. Use exc_type_str instead.") + def exc_type(self) -> type[BaseException] | None: ... + else: + exc_type: type[BaseException] + if sys.version_info >= (3, 13): def __init__( self, exc_type: type[BaseException], @@ -130,12 +143,15 @@ class TracebackException: compact: bool = False, max_group_width: int = 15, max_group_depth: int = 10, + save_exc_type: bool = True, _seen: set[int] | None = None, ) -> None: ... - @classmethod - def from_exception( - cls, - exc: BaseException, + elif sys.version_info >= (3, 11): + def __init__( + self, + exc_type: type[BaseException], + exc_value: BaseException, + exc_traceback: TracebackType | None, *, limit: int | None = None, lookup_lines: bool = True, @@ -143,7 +159,8 @@ class TracebackException: compact: bool = False, max_group_width: int = 15, max_group_depth: int = 10, - ) -> Self: ... + _seen: set[int] | None = None, + ) -> None: ... elif sys.version_info >= (3, 10): def __init__( self, @@ -157,6 +174,20 @@ class TracebackException: compact: bool = False, _seen: set[int] | None = None, ) -> None: ... + else: + def __init__( + self, + exc_type: type[BaseException], + exc_value: BaseException, + exc_traceback: TracebackType | None, + *, + limit: int | None = None, + lookup_lines: bool = True, + capture_locals: bool = False, + _seen: set[int] | None = None, + ) -> None: ... + + if sys.version_info >= (3, 11): @classmethod def from_exception( cls, @@ -166,19 +197,21 @@ class TracebackException: lookup_lines: bool = True, capture_locals: bool = False, compact: bool = False, + max_group_width: int = 15, + max_group_depth: int = 10, ) -> Self: ... - else: - def __init__( - self, - exc_type: type[BaseException], - exc_value: BaseException, - exc_traceback: TracebackType | None, + elif sys.version_info >= (3, 10): + @classmethod + def from_exception( + cls, + exc: BaseException, *, limit: int | None = None, lookup_lines: bool = True, capture_locals: bool = False, - _seen: set[int] | None = None, - ) -> None: ... + compact: bool = False, + ) -> Self: ... + else: @classmethod def from_exception( cls, exc: BaseException, *, limit: int | None = None, lookup_lines: bool = True, capture_locals: bool = False @@ -190,7 +223,10 @@ class TracebackException: else: def format(self, *, chain: bool = True) -> Generator[str, None, None]: ... - def format_exception_only(self) -> Generator[str, None, None]: ... + if sys.version_info >= (3, 13): + def format_exception_only(self, *, show_group: bool = False, _depth: int = 0) -> Generator[str, None, None]: ... + else: + def format_exception_only(self) -> Generator[str, None, None]: ... if sys.version_info >= (3, 11): def print(self, *, file: SupportsWrite[str] | None = None, chain: bool = True) -> None: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/types.pyi b/crates/red_knot/vendor/typeshed/stdlib/types.pyi index 38940b4345c82..54465339cefbb 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/types.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/types.pyi @@ -58,6 +58,9 @@ if sys.version_info >= (3, 10): if sys.version_info >= (3, 12): __all__ += ["get_original_bases"] +if sys.version_info >= (3, 13): + __all__ += ["CapsuleType"] + # Note, all classes "defined" here require special handling. _T1 = TypeVar("_T1") @@ -299,7 +302,7 @@ class MappingProxyType(Mapping[_KT, _VT_co]): def values(self) -> ValuesView[_VT_co]: ... def items(self) -> ItemsView[_KT, _VT_co]: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def __reversed__(self) -> Iterator[_KT]: ... def __or__(self, value: Mapping[_T1, _T2], /) -> dict[_KT | _T1, _VT_co | _T2]: ... def __ror__(self, value: Mapping[_T1, _T2], /) -> dict[_KT | _T1, _VT_co | _T2]: ... @@ -355,6 +358,8 @@ class GeneratorType(Generator[_YieldT_co, _SendT_contra, _ReturnT_co]): ) -> _YieldT_co: ... @overload def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = ..., /) -> _YieldT_co: ... + if sys.version_info >= (3, 13): + def __class_getitem__(cls, item: Any, /) -> Any: ... @final class AsyncGeneratorType(AsyncGenerator[_YieldT_co, _SendT_contra]): @@ -398,6 +403,8 @@ class CoroutineType(Coroutine[_YieldT_co, _SendT_contra, _ReturnT_co]): ) -> _YieldT_co: ... @overload def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = ..., /) -> _YieldT_co: ... + if sys.version_info >= (3, 13): + def __class_getitem__(cls, item: Any, /) -> Any: ... @final class MethodType: @@ -607,3 +614,7 @@ if sys.version_info >= (3, 10): def __ror__(self, value: Any, /) -> UnionType: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... + +if sys.version_info >= (3, 13): + @final + class CapsuleType: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/typing.pyi b/crates/red_knot/vendor/typeshed/stdlib/typing.pyi index d047f1c87621c..f04b2d85871b0 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/typing.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/typing.pyi @@ -128,6 +128,9 @@ if sys.version_info >= (3, 11): if sys.version_info >= (3, 12): __all__ += ["TypeAliasType", "override"] +if sys.version_info >= (3, 13): + __all__ += ["get_protocol_members", "is_protocol", "NoDefault", "TypeIs", "ReadOnly"] + Any = object() def final(f: _T) -> _T: ... @@ -146,6 +149,21 @@ class TypeVar: if sys.version_info >= (3, 12): @property def __infer_variance__(self) -> bool: ... + if sys.version_info >= (3, 13): + @property + def __default__(self) -> Any: ... + if sys.version_info >= (3, 13): + def __init__( + self, + name: str, + *constraints: Any, + bound: Any | None = None, + contravariant: bool = False, + covariant: bool = False, + infer_variance: bool = False, + default: Any = ..., + ) -> None: ... + elif sys.version_info >= (3, 12): def __init__( self, name: str, @@ -164,6 +182,9 @@ class TypeVar: def __ror__(self, left: Any) -> _SpecialForm: ... if sys.version_info >= (3, 11): def __typing_subst__(self, arg: Any) -> Any: ... + if sys.version_info >= (3, 13): + def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... + def has_default(self) -> bool: ... # Used for an undocumented mypy feature. Does not exist at runtime. _promote = object() @@ -205,7 +226,15 @@ if sys.version_info >= (3, 11): class TypeVarTuple: @property def __name__(self) -> str: ... - def __init__(self, name: str) -> None: ... + if sys.version_info >= (3, 13): + @property + def __default__(self) -> Any: ... + def has_default(self) -> bool: ... + if sys.version_info >= (3, 13): + def __init__(self, name: str, *, default: Any = ...) -> None: ... + else: + def __init__(self, name: str) -> None: ... + def __iter__(self) -> Any: ... def __typing_subst__(self, arg: Never) -> Never: ... def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... @@ -238,6 +267,21 @@ if sys.version_info >= (3, 10): if sys.version_info >= (3, 12): @property def __infer_variance__(self) -> bool: ... + if sys.version_info >= (3, 13): + @property + def __default__(self) -> Any: ... + if sys.version_info >= (3, 13): + def __init__( + self, + name: str, + *, + bound: Any | None = None, + contravariant: bool = False, + covariant: bool = False, + infer_variance: bool = False, + default: Any = ..., + ) -> None: ... + elif sys.version_info >= (3, 12): def __init__( self, name: str, @@ -262,6 +306,8 @@ if sys.version_info >= (3, 10): def __or__(self, right: Any) -> _SpecialForm: ... def __ror__(self, left: Any) -> _SpecialForm: ... + if sys.version_info >= (3, 13): + def has_default(self) -> bool: ... Concatenate: _SpecialForm TypeAlias: _SpecialForm @@ -890,6 +936,8 @@ class NamedTuple(tuple[Any, ...]): def _make(cls, iterable: Iterable[Any]) -> typing_extensions.Self: ... def _asdict(self) -> dict[str, Any]: ... def _replace(self, **kwargs: Any) -> typing_extensions.Self: ... + if sys.version_info >= (3, 13): + def __replace__(self, **kwargs: Any) -> typing_extensions.Self: ... # Internal mypy fallback type for all typed dicts (does not exist at runtime) # N.B. Keep this mostly in sync with typing_extensions._TypedDict/mypy_extensions._TypedDict @@ -942,7 +990,16 @@ class ForwardRef: else: def __init__(self, arg: str, is_argument: bool = True) -> None: ... - if sys.version_info >= (3, 9): + if sys.version_info >= (3, 13): + def _evaluate( + self, + globalns: dict[str, Any] | None, + localns: dict[str, Any] | None, + type_params: tuple[TypeVar | ParamSpec | TypeVarTuple, ...] = ..., + *, + recursive_guard: frozenset[str], + ) -> Any | None: ... + elif sys.version_info >= (3, 9): def _evaluate( self, globalns: dict[str, Any] | None, localns: dict[str, Any] | None, recursive_guard: frozenset[str] ) -> Any | None: ... @@ -985,3 +1042,9 @@ if sys.version_info >= (3, 12): if sys.version_info >= (3, 13): def is_protocol(tp: type, /) -> bool: ... def get_protocol_members(tp: type, /) -> frozenset[str]: ... + @final + class _NoDefaultType: ... + + NoDefault: _NoDefaultType + TypeIs: _SpecialForm + ReadOnly: _SpecialForm diff --git a/crates/red_knot/vendor/typeshed/stdlib/typing_extensions.pyi b/crates/red_knot/vendor/typeshed/stdlib/typing_extensions.pyi index 48a398ba4095f..a7d2b2c2e0835 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/typing_extensions.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/typing_extensions.pyi @@ -3,13 +3,13 @@ import sys import typing from _collections_abc import dict_items, dict_keys, dict_values from _typeshed import IdentityFunction +from contextlib import AbstractAsyncContextManager as AsyncContextManager, AbstractContextManager as ContextManager from typing import ( # noqa: Y022,Y037,Y038,Y039 IO as IO, TYPE_CHECKING as TYPE_CHECKING, AbstractSet as AbstractSet, Any as Any, AnyStr as AnyStr, - AsyncContextManager as AsyncContextManager, AsyncGenerator as AsyncGenerator, AsyncIterable as AsyncIterable, AsyncIterator as AsyncIterator, @@ -20,7 +20,6 @@ from typing import ( # noqa: Y022,Y037,Y038,Y039 ClassVar as ClassVar, Collection as Collection, Container as Container, - ContextManager as ContextManager, Coroutine as Coroutine, Counter as Counter, DefaultDict as DefaultDict, @@ -95,6 +94,7 @@ __all__ = [ "Coroutine", "AsyncGenerator", "AsyncContextManager", + "CapsuleType", "ChainMap", "ContextManager", "Counter", @@ -166,6 +166,7 @@ __all__ = [ "MutableMapping", "MutableSequence", "MutableSet", + "NoDefault", "Optional", "Pattern", "Reversible", @@ -379,86 +380,6 @@ else: def __or__(self, other: Any) -> _SpecialForm: ... def __ror__(self, other: Any) -> _SpecialForm: ... -# New things in 3.xx -# The `default` parameter was added to TypeVar, ParamSpec, and TypeVarTuple (PEP 696) -# The `infer_variance` parameter was added to TypeVar in 3.12 (PEP 695) -# typing_extensions.override (PEP 698) -@final -class TypeVar: - @property - def __name__(self) -> str: ... - @property - def __bound__(self) -> Any | None: ... - @property - def __constraints__(self) -> tuple[Any, ...]: ... - @property - def __covariant__(self) -> bool: ... - @property - def __contravariant__(self) -> bool: ... - @property - def __infer_variance__(self) -> bool: ... - @property - def __default__(self) -> Any | None: ... - def __init__( - self, - name: str, - *constraints: Any, - bound: Any | None = None, - covariant: bool = False, - contravariant: bool = False, - default: Any | None = None, - infer_variance: bool = False, - ) -> None: ... - if sys.version_info >= (3, 10): - def __or__(self, right: Any) -> _SpecialForm: ... - def __ror__(self, left: Any) -> _SpecialForm: ... - if sys.version_info >= (3, 11): - def __typing_subst__(self, arg: Any) -> Any: ... - -@final -class ParamSpec: - @property - def __name__(self) -> str: ... - @property - def __bound__(self) -> Any | None: ... - @property - def __covariant__(self) -> bool: ... - @property - def __contravariant__(self) -> bool: ... - @property - def __infer_variance__(self) -> bool: ... - @property - def __default__(self) -> Any | None: ... - def __init__( - self, - name: str, - *, - bound: None | type[Any] | str = None, - contravariant: bool = False, - covariant: bool = False, - default: type[Any] | str | None = None, - ) -> None: ... - @property - def args(self) -> ParamSpecArgs: ... - @property - def kwargs(self) -> ParamSpecKwargs: ... - -@final -class TypeVarTuple: - @property - def __name__(self) -> str: ... - @property - def __default__(self) -> Any | None: ... - def __init__(self, name: str, *, default: Any | None = None) -> None: ... - def __iter__(self) -> Any: ... # Unpack[Self] - -class deprecated: - message: LiteralString - category: type[Warning] | None - stacklevel: int - def __init__(self, message: LiteralString, /, *, category: type[Warning] | None = ..., stacklevel: int = 1) -> None: ... - def __call__(self, arg: _T, /) -> _T: ... - if sys.version_info >= (3, 12): from collections.abc import Buffer as Buffer from types import get_original_bases as get_original_bases @@ -494,16 +415,118 @@ else: def __buffer__(self, flags: int, /) -> memoryview: ... if sys.version_info >= (3, 13): - from typing import get_protocol_members as get_protocol_members, is_protocol as is_protocol + from types import CapsuleType as CapsuleType + from typing import ( + NoDefault as NoDefault, + ParamSpec as ParamSpec, + ReadOnly as ReadOnly, + TypeIs as TypeIs, + TypeVar as TypeVar, + TypeVarTuple as TypeVarTuple, + get_protocol_members as get_protocol_members, + is_protocol as is_protocol, + ) + from warnings import deprecated as deprecated else: def is_protocol(tp: type, /) -> bool: ... def get_protocol_members(tp: type, /) -> frozenset[str]: ... + @final + class _NoDefaultType: ... + + NoDefault: _NoDefaultType + @final + class CapsuleType: ... + + class deprecated: + message: LiteralString + category: type[Warning] | None + stacklevel: int + def __init__(self, message: LiteralString, /, *, category: type[Warning] | None = ..., stacklevel: int = 1) -> None: ... + def __call__(self, arg: _T, /) -> _T: ... + + @final + class TypeVar: + @property + def __name__(self) -> str: ... + @property + def __bound__(self) -> Any | None: ... + @property + def __constraints__(self) -> tuple[Any, ...]: ... + @property + def __covariant__(self) -> bool: ... + @property + def __contravariant__(self) -> bool: ... + @property + def __infer_variance__(self) -> bool: ... + @property + def __default__(self) -> Any: ... + def __init__( + self, + name: str, + *constraints: Any, + bound: Any | None = None, + covariant: bool = False, + contravariant: bool = False, + default: Any = ..., + infer_variance: bool = False, + ) -> None: ... + def has_default(self) -> bool: ... + def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... + if sys.version_info >= (3, 10): + def __or__(self, right: Any) -> _SpecialForm: ... + def __ror__(self, left: Any) -> _SpecialForm: ... + if sys.version_info >= (3, 11): + def __typing_subst__(self, arg: Any) -> Any: ... + + @final + class ParamSpec: + @property + def __name__(self) -> str: ... + @property + def __bound__(self) -> Any | None: ... + @property + def __covariant__(self) -> bool: ... + @property + def __contravariant__(self) -> bool: ... + @property + def __infer_variance__(self) -> bool: ... + @property + def __default__(self) -> Any: ... + def __init__( + self, + name: str, + *, + bound: None | type[Any] | str = None, + contravariant: bool = False, + covariant: bool = False, + default: Any = ..., + ) -> None: ... + @property + def args(self) -> ParamSpecArgs: ... + @property + def kwargs(self) -> ParamSpecKwargs: ... + def has_default(self) -> bool: ... + def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... + if sys.version_info >= (3, 10): + def __or__(self, right: Any) -> _SpecialForm: ... + def __ror__(self, left: Any) -> _SpecialForm: ... + + @final + class TypeVarTuple: + @property + def __name__(self) -> str: ... + @property + def __default__(self) -> Any: ... + def __init__(self, name: str, *, default: Any = ...) -> None: ... + def __iter__(self) -> Any: ... # Unpack[Self] + def has_default(self) -> bool: ... + def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... + + ReadOnly: _SpecialForm + TypeIs: _SpecialForm class Doc: documentation: str def __init__(self, documentation: str, /) -> None: ... def __hash__(self) -> int: ... def __eq__(self, other: object) -> bool: ... - -ReadOnly: _SpecialForm -TypeIs: _SpecialForm diff --git a/crates/red_knot/vendor/typeshed/stdlib/unittest/case.pyi b/crates/red_knot/vendor/typeshed/stdlib/unittest/case.pyi index bd1c064f02704..b63292604ecc5 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/unittest/case.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/unittest/case.pyi @@ -329,7 +329,7 @@ class _AssertRaisesContext(_AssertRaisesBaseContext, Generic[_E]): self, exc_type: type[BaseException] | None, exc_value: BaseException | None, tb: TracebackType | None ) -> bool: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class _AssertWarnsContext(_AssertRaisesBaseContext): warning: WarningMessage diff --git a/crates/red_knot/vendor/typeshed/stdlib/urllib/parse.pyi b/crates/red_knot/vendor/typeshed/stdlib/urllib/parse.pyi index ed1929b265019..89a50995d5530 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/urllib/parse.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/urllib/parse.pyi @@ -56,7 +56,7 @@ class _NetlocResultMixinBase(Generic[AnyStr]): @property def port(self) -> int | None: ... if sys.version_info >= (3, 9): - def __class_getitem__(cls, item: Any) -> GenericAlias: ... + def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class _NetlocResultMixinStr(_NetlocResultMixinBase[str], _ResultMixinStr): ... class _NetlocResultMixinBytes(_NetlocResultMixinBase[bytes], _ResultMixinBytes): ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/urllib/request.pyi b/crates/red_knot/vendor/typeshed/stdlib/urllib/request.pyi index 3442be8b8ea46..2a6476f9e6d83 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/urllib/request.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/urllib/request.pyi @@ -52,16 +52,23 @@ _T = TypeVar("_T") _UrlopenRet: TypeAlias = Any _DataType: TypeAlias = ReadableBuffer | SupportsRead[bytes] | Iterable[bytes] | None -def urlopen( - url: str | Request, - data: _DataType | None = None, - timeout: float | None = ..., - *, - cafile: str | None = None, - capath: str | None = None, - cadefault: bool = False, - context: ssl.SSLContext | None = None, -) -> _UrlopenRet: ... +if sys.version_info >= (3, 13): + def urlopen( + url: str | Request, data: _DataType | None = None, timeout: float | None = ..., *, context: ssl.SSLContext | None = None + ) -> _UrlopenRet: ... + +else: + def urlopen( + url: str | Request, + data: _DataType | None = None, + timeout: float | None = ..., + *, + cafile: str | None = None, + capath: str | None = None, + cadefault: bool = False, + context: ssl.SSLContext | None = None, + ) -> _UrlopenRet: ... + def install_opener(opener: OpenerDirector) -> None: ... def build_opener(*handlers: BaseHandler | Callable[[], BaseHandler]) -> OpenerDirector: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/warnings.pyi b/crates/red_knot/vendor/typeshed/stdlib/warnings.pyi index 12afea9337e7d..539a8f2379c10 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/warnings.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/warnings.pyi @@ -3,7 +3,7 @@ from _warnings import warn as warn, warn_explicit as warn_explicit from collections.abc import Sequence from types import ModuleType, TracebackType from typing import Any, Generic, Literal, TextIO, TypeVar, overload -from typing_extensions import TypeAlias +from typing_extensions import LiteralString, TypeAlias __all__ = [ "warn", @@ -16,6 +16,10 @@ __all__ = [ "catch_warnings", ] +if sys.version_info >= (3, 13): + __all__ += ["deprecated"] + +_T = TypeVar("_T") _W = TypeVar("_W", bound=list[WarningMessage] | None) _ActionKind: TypeAlias = Literal["default", "error", "ignore", "always", "module", "once"] @@ -110,3 +114,11 @@ class catch_warnings(Generic[_W]): def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None ) -> None: ... + +if sys.version_info >= (3, 13): + class deprecated: + message: LiteralString + category: type[Warning] | None + stacklevel: int + def __init__(self, message: LiteralString, /, *, category: type[Warning] | None = ..., stacklevel: int = 1) -> None: ... + def __call__(self, arg: _T, /) -> _T: ... diff --git a/crates/red_knot/vendor/typeshed/stdlib/wsgiref/util.pyi b/crates/red_knot/vendor/typeshed/stdlib/wsgiref/util.pyi index 962fac2c5a22a..3966e17b0d28d 100644 --- a/crates/red_knot/vendor/typeshed/stdlib/wsgiref/util.pyi +++ b/crates/red_knot/vendor/typeshed/stdlib/wsgiref/util.pyi @@ -4,6 +4,8 @@ from collections.abc import Callable from typing import IO, Any __all__ = ["FileWrapper", "guess_scheme", "application_uri", "request_uri", "shift_path_info", "setup_testing_defaults"] +if sys.version_info >= (3, 13): + __all__ += ["is_hop_by_hop"] class FileWrapper: filelike: IO[bytes] From fd9d68051e9b539cf583ba0b7b8861f37007c766 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 1 Jun 2024 14:08:02 -0400 Subject: [PATCH 03/25] Update CHANGELOG.md (#11683) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a97219187481..500a5ed511d37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - \[`flake8-pyi`\] Implement `PYI064` ([#11325](https://github.com/astral-sh/ruff/pull/11325)) - \[`flake8-pyi`\] Implement `PYI066` ([#11541](https://github.com/astral-sh/ruff/pull/11541)) - \[`flake8-pyi`\] Implement `PYI057` ([#11486](https://github.com/astral-sh/ruff/pull/11486)) -- \[`pyflakes`\] Add option to enable F822 in `__init__.py` files ([#11370](https://github.com/astral-sh/ruff/pull/11370)) +- \[`pyflakes`\] Enable `F822` in `__init__.py` files by default ([#11370](https://github.com/astral-sh/ruff/pull/11370)) ### Formatter From b36dd1aa51350356acfa87c850b76b8b5a743a60 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 1 Jun 2024 19:21:11 -0400 Subject: [PATCH 04/25] [`flake8-simplify`] Simplify double negatives in `SIM103` (#11684) ## Summary Closes: https://github.com/astral-sh/ruff/issues/11685. --- .../test/fixtures/flake8_simplify/SIM103.py | 12 +++++ .../flake8_simplify/rules/needless_bool.rs | 17 +++++-- ...ify__tests__preview__SIM103_SIM103.py.snap | 46 +++++++++++++++++++ 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM103.py b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM103.py index e1b868888df86..f42b9dc262a80 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM103.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM103.py @@ -111,3 +111,15 @@ def f(): if a: return False return True + + +def f(): + if not 10 < a: + return False + return True + + +def f(): + if 10 < a: + return False + return True diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs index 34a151a5158c8..1eb1943c89d6d 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs @@ -202,11 +202,20 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) { } else { // If the return values are inverted, wrap the condition in a `not`. if inverted { - Some(Expr::UnaryOp(ast::ExprUnaryOp { + if let Expr::UnaryOp(ast::ExprUnaryOp { op: ast::UnaryOp::Not, - operand: Box::new(if_test.clone()), - range: TextRange::default(), - })) + operand, + .. + }) = if_test + { + Some((**operand).clone()) + } else { + Some(Expr::UnaryOp(ast::ExprUnaryOp { + op: ast::UnaryOp::Not, + operand: Box::new(if_test.clone()), + range: TextRange::default(), + })) + } } else if if_test.is_compare_expr() { // If the condition is a comparison, we can replace it with the condition, since we // know it's a boolean. diff --git a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM103_SIM103.py.snap b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM103_SIM103.py.snap index a71b6622c25cf..1b44533abc17b 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM103_SIM103.py.snap +++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM103_SIM103.py.snap @@ -213,3 +213,49 @@ SIM103.py:111:5: SIM103 [*] Return the condition `not a` directly 112 |- return False 113 |- return True 111 |+ return not a +114 112 | +115 113 | +116 114 | def f(): + +SIM103.py:117:5: SIM103 [*] Return the condition `10 < a` directly + | +116 | def f(): +117 | if not 10 < a: + | _____^ +118 | | return False +119 | | return True + | |_______________^ SIM103 + | + = help: Replace with `return 10 < a` + +ℹ Unsafe fix +114 114 | +115 115 | +116 116 | def f(): +117 |- if not 10 < a: +118 |- return False +119 |- return True + 117 |+ return 10 < a +120 118 | +121 119 | +122 120 | def f(): + +SIM103.py:123:5: SIM103 [*] Return the condition `not 10 < a` directly + | +122 | def f(): +123 | if 10 < a: + | _____^ +124 | | return False +125 | | return True + | |_______________^ SIM103 + | + = help: Replace with `return not 10 < a` + +ℹ Unsafe fix +120 120 | +121 121 | +122 122 | def f(): +123 |- if 10 < a: +124 |- return False +125 |- return True + 123 |+ return not 10 < a From 9f3e609278f1a2b59e565c2af814bbec5eec8229 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sun, 2 Jun 2024 14:06:04 +0100 Subject: [PATCH 05/25] Make tests aware that py313 is the latest supported Python version (#11690) --- .../test/fixtures/pyupgrade/UP036_0.py | 14 ++++++------- ...__rules__pyupgrade__tests__UP036_0.py.snap | 20 +++++++++---------- crates/ruff_linter/src/settings/types.rs | 4 +++- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py index 47ac88f76148c..50d499b872bb4 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py @@ -179,13 +179,13 @@ def g(): if sys.version_info > (3, 0): \ expected_error = [] -if sys.version_info < (3,12): +if sys.version_info < (3,13): print("py3") -if sys.version_info <= (3,12): +if sys.version_info <= (3,13): print("py3") -if sys.version_info <= (3,12): +if sys.version_info <= (3,13): print("py3") if sys.version_info == 10000000: @@ -197,10 +197,10 @@ def g(): if sys.version_info <= (3,10000000): print("py3") -if sys.version_info > (3,12): +if sys.version_info > (3,13): print("py3") -if sys.version_info >= (3,12): +if sys.version_info >= (3,13): print("py3") # Slices on `sys.version_info` should be treated equivalently. @@ -210,10 +210,10 @@ def g(): if sys.version_info[:3] >= (3,0): print("py3") -if sys.version_info[:2] > (3,13): +if sys.version_info[:2] > (3,14): print("py3") -if sys.version_info[:3] > (3,13): +if sys.version_info[:3] > (3,14): print("py3") if sys.version_info > (3,0): diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP036_0.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP036_0.py.snap index 4e538c1f4e19d..329642fd56850 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP036_0.py.snap +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP036_0.py.snap @@ -663,13 +663,13 @@ UP036_0.py:179:8: UP036 [*] Version block is outdated for minimum Python version 179 |- if sys.version_info > (3, 0): \ 180 179 | expected_error = [] 181 180 | -182 181 | if sys.version_info < (3,12): +182 181 | if sys.version_info < (3,13): UP036_0.py:182:4: UP036 [*] Version block is outdated for minimum Python version | 180 | expected_error = [] 181 | -182 | if sys.version_info < (3,12): +182 | if sys.version_info < (3,13): | ^^^^^^^^^^^^^^^^^^^^^^^^^ UP036 183 | print("py3") | @@ -679,10 +679,10 @@ UP036_0.py:182:4: UP036 [*] Version block is outdated for minimum Python version 179 179 | if sys.version_info > (3, 0): \ 180 180 | expected_error = [] 181 181 | -182 |-if sys.version_info < (3,12): +182 |-if sys.version_info < (3,13): 183 |- print("py3") 184 182 | -185 183 | if sys.version_info <= (3,12): +185 183 | if sys.version_info <= (3,13): 186 184 | print("py3") UP036_0.py:191:24: UP036 Version specifier is invalid @@ -716,17 +716,17 @@ UP036_0.py:203:4: UP036 [*] Version block is outdated for minimum Python version | 201 | print("py3") 202 | -203 | if sys.version_info >= (3,12): +203 | if sys.version_info >= (3,13): | ^^^^^^^^^^^^^^^^^^^^^^^^^^ UP036 204 | print("py3") | = help: Remove outdated version block ℹ Unsafe fix -200 200 | if sys.version_info > (3,12): +200 200 | if sys.version_info > (3,13): 201 201 | print("py3") 202 202 | -203 |-if sys.version_info >= (3,12): +203 |-if sys.version_info >= (3,13): 204 |- print("py3") 203 |+print("py3") 205 204 | @@ -771,7 +771,7 @@ UP036_0.py:210:4: UP036 [*] Version block is outdated for minimum Python version 211 |- print("py3") 210 |+print("py3") 212 211 | -213 212 | if sys.version_info[:2] > (3,13): +213 212 | if sys.version_info[:2] > (3,14): 214 213 | print("py3") UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version @@ -786,7 +786,7 @@ UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version = help: Remove outdated version block ℹ Unsafe fix -216 216 | if sys.version_info[:3] > (3,13): +216 216 | if sys.version_info[:3] > (3,14): 217 217 | print("py3") 218 218 | 219 |-if sys.version_info > (3,0): @@ -801,5 +801,3 @@ UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version 226 |- "this is\ 225 |+"this is\ 227 226 | allowed too" - - diff --git a/crates/ruff_linter/src/settings/types.rs b/crates/ruff_linter/src/settings/types.rs index ce9f9abd057f8..877d705bb935e 100644 --- a/crates/ruff_linter/src/settings/types.rs +++ b/crates/ruff_linter/src/settings/types.rs @@ -50,6 +50,8 @@ pub enum PythonVersion { Py311, Py312, Py313, + // Remember to update the `latest()` function + // when adding new versions here! } impl From for Pep440Version { @@ -62,7 +64,7 @@ impl From for Pep440Version { impl PythonVersion { /// Return the latest supported Python version. pub const fn latest() -> Self { - Self::Py312 + Self::Py313 } pub const fn as_tuple(&self) -> (u8, u8) { From 6d79ddc0aa90ca540551bf2271cc556663bd70d1 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 2 Jun 2024 13:51:03 -0400 Subject: [PATCH 06/25] [`pyupgrade`] Write empty string in lieu of panic (#11696) ## Summary Closes https://github.com/astral-sh/ruff/issues/11692. --- .../test/fixtures/pyupgrade/UP032_0.py | 6 +++ .../src/rules/pyupgrade/rules/f_strings.rs | 18 ++++++--- ...__rules__pyupgrade__tests__UP032_0.py.snap | 40 +++++++++++++++++++ 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py index 09ea36db61943..497caa8d3c663 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py @@ -259,3 +259,9 @@ async def c(): # The string _should_ be converted, since the function call is repeated in the arguments. "{0} {1}".format(foo(), foo()) + +# The call should be removed, but the string itself should remain. +''.format(self.project) + +# The call should be removed, but the string itself should remain. +"".format(self.project) diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index 31a2383274349..d6441f9904490 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -400,7 +400,7 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F return; }; - if !value.is_string_literal_expr() { + let Expr::StringLiteral(literal) = &**value else { return; }; @@ -520,10 +520,18 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F .intersects(call.arguments.range()); if !has_comments { - diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( - contents, - call.range(), - ))); + if contents.is_empty() { + // Ex) `''.format(self.project)` + diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( + checker.locator().slice(literal).to_string(), + call.range(), + ))); + } else { + diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( + contents, + call.range(), + ))); + } }; checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap index e5eb894054014..63a25c3f5922a 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap @@ -1321,6 +1321,8 @@ UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call 260 | # The string _should_ be converted, since the function call is repeated in the arguments. 261 | "{0} {1}".format(foo(), foo()) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032 +262 | +263 | # The call should be removed, but the string itself should remain. | = help: Convert to f-string @@ -1330,3 +1332,41 @@ UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call 260 260 | # The string _should_ be converted, since the function call is repeated in the arguments. 261 |-"{0} {1}".format(foo(), foo()) 261 |+f"{foo()} {foo()}" +262 262 | +263 263 | # The call should be removed, but the string itself should remain. +264 264 | ''.format(self.project) + +UP032_0.py:264:1: UP032 [*] Use f-string instead of `format` call + | +263 | # The call should be removed, but the string itself should remain. +264 | ''.format(self.project) + | ^^^^^^^^^^^^^^^^^^^^^^^ UP032 +265 | +266 | # The call should be removed, but the string itself should remain. + | + = help: Convert to f-string + +ℹ Safe fix +261 261 | "{0} {1}".format(foo(), foo()) +262 262 | +263 263 | # The call should be removed, but the string itself should remain. +264 |-''.format(self.project) + 264 |+'' +265 265 | +266 266 | # The call should be removed, but the string itself should remain. +267 267 | "".format(self.project) + +UP032_0.py:267:1: UP032 [*] Use f-string instead of `format` call + | +266 | # The call should be removed, but the string itself should remain. +267 | "".format(self.project) + | ^^^^^^^^^^^^^^^^^^^^^^^ UP032 + | + = help: Convert to f-string + +ℹ Safe fix +264 264 | ''.format(self.project) +265 265 | +266 266 | # The call should be removed, but the string itself should remain. +267 |-"".format(self.project) + 267 |+"" From 0ea2519e809901a6dd0e8d2c12136f8168bc6bcc Mon Sep 17 00:00:00 2001 From: Tobias Fischer <30701667+tobb10001@users.noreply.github.com> Date: Sun, 2 Jun 2024 19:59:57 +0200 Subject: [PATCH 07/25] Add RDJson support. (#11682) ## Summary Implement support for RDJson output for `ruff check`, as requested in #8655. ## Test Plan Tested using a snapshot test. Same approach as for e.g. the JSON output formatter. ## Additional info I tried to keep the implementation close to the JSON implementation. I had to deviate a bit to make the `suggestions` key work: If there are no suggestions, then setting `suggestions` to `null` is invalid according to the JSONSchema. Therefore, I opted for a slightly more complex implementation, that skips the `suggestions` key entirely if there are no fixes available for the given diagnostic. Maybe it would have been easier to set `"suggestions": []`, but I ended up doing it this way. I didn't consider notebooks, as I _think_ that RDJson doesn't work with notebooks. This should be confirmed, and if so, there should be some form of warning or error emitted when trying to output diagnostics for a notebook. I also didn't consider `ruff format`, as this comment: https://github.com/astral-sh/ruff/issues/8655#issuecomment-1811446160 suggests that that wouldn't be compatible. I'm new to Rust, any feedback is appreciated. :slightly_smiling_face: I implemented this in order to have a productive rainy saturday afternoon, I'm not knowledgeable about RDJson beyond the sources linked in the issue. --- crates/ruff/src/printer.rs | 6 +- crates/ruff_linter/src/message/mod.rs | 2 + crates/ruff_linter/src/message/rdjson.rs | 138 ++++++++++++++++++ ...inter__message__rdjson__tests__output.snap | 103 +++++++++++++ crates/ruff_linter/src/settings/types.rs | 2 + docs/configuration.md | 2 +- ruff.schema.json | 1 + 7 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 crates/ruff_linter/src/message/rdjson.rs create mode 100644 crates/ruff_linter/src/message/snapshots/ruff_linter__message__rdjson__tests__output.snap diff --git a/crates/ruff/src/printer.rs b/crates/ruff/src/printer.rs index c44150602bdce..3931a5a13df3a 100644 --- a/crates/ruff/src/printer.rs +++ b/crates/ruff/src/printer.rs @@ -13,7 +13,8 @@ use ruff_linter::fs::relativize_path; use ruff_linter::logging::LogLevel; use ruff_linter::message::{ AzureEmitter, Emitter, EmitterContext, GithubEmitter, GitlabEmitter, GroupedEmitter, - JsonEmitter, JsonLinesEmitter, JunitEmitter, PylintEmitter, SarifEmitter, TextEmitter, + JsonEmitter, JsonLinesEmitter, JunitEmitter, PylintEmitter, RdjsonEmitter, SarifEmitter, + TextEmitter, }; use ruff_linter::notify_user; use ruff_linter::registry::{AsRule, Rule}; @@ -242,6 +243,9 @@ impl Printer { SerializationFormat::Json => { JsonEmitter.emit(writer, &diagnostics.messages, &context)?; } + SerializationFormat::Rdjson => { + RdjsonEmitter.emit(writer, &diagnostics.messages, &context)?; + } SerializationFormat::JsonLines => { JsonLinesEmitter.emit(writer, &diagnostics.messages, &context)?; } diff --git a/crates/ruff_linter/src/message/mod.rs b/crates/ruff_linter/src/message/mod.rs index 2f44de44eda71..7e95fc9d14cba 100644 --- a/crates/ruff_linter/src/message/mod.rs +++ b/crates/ruff_linter/src/message/mod.rs @@ -13,6 +13,7 @@ pub use json::JsonEmitter; pub use json_lines::JsonLinesEmitter; pub use junit::JunitEmitter; pub use pylint::PylintEmitter; +pub use rdjson::RdjsonEmitter; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; use ruff_notebook::NotebookIndex; use ruff_source_file::{SourceFile, SourceLocation}; @@ -29,6 +30,7 @@ mod json; mod json_lines; mod junit; mod pylint; +mod rdjson; mod sarif; mod text; diff --git a/crates/ruff_linter/src/message/rdjson.rs b/crates/ruff_linter/src/message/rdjson.rs new file mode 100644 index 0000000000000..9d3ff50411f2a --- /dev/null +++ b/crates/ruff_linter/src/message/rdjson.rs @@ -0,0 +1,138 @@ +use std::io::Write; + +use serde::ser::SerializeSeq; +use serde::{Serialize, Serializer}; +use serde_json::{json, Value}; + +use ruff_diagnostics::Edit; +use ruff_source_file::SourceCode; +use ruff_text_size::Ranged; + +use crate::message::{Emitter, EmitterContext, Message, SourceLocation}; +use crate::registry::AsRule; + +#[derive(Default)] +pub struct RdjsonEmitter; + +impl Emitter for RdjsonEmitter { + fn emit( + &mut self, + writer: &mut dyn Write, + messages: &[Message], + _context: &EmitterContext, + ) -> anyhow::Result<()> { + serde_json::to_writer_pretty( + writer, + &json!({ + "source": { + "name": "ruff", + "url": "https://docs.astral.sh/ruff", + }, + "severity": "warning", + "diagnostics": &ExpandedMessages{ messages } + }), + )?; + + Ok(()) + } +} + +struct ExpandedMessages<'a> { + messages: &'a [Message], +} + +impl Serialize for ExpandedMessages<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_seq(Some(self.messages.len()))?; + + for message in self.messages { + let value = message_to_rdjson_value(message); + s.serialize_element(&value)?; + } + + s.end() + } +} + +fn message_to_rdjson_value(message: &Message) -> Value { + let source_code = message.file.to_source_code(); + + let start_location = source_code.source_location(message.start()); + let end_location = source_code.source_location(message.end()); + + if let Some(fix) = message.fix.as_ref() { + json!({ + "message": message.kind.body, + "location": { + "path": message.filename(), + "range": rdjson_range(&start_location, &end_location), + }, + "code": { + "value": message.kind.rule().noqa_code().to_string(), + "url": message.kind.rule().url(), + }, + "suggestions": rdjson_suggestions(fix.edits(), &source_code), + }) + } else { + json!({ + "message": message.kind.body, + "location": { + "path": message.filename(), + "range": rdjson_range(&start_location, &end_location), + }, + "code": { + "value": message.kind.rule().noqa_code().to_string(), + "url": message.kind.rule().url(), + }, + }) + } +} + +fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value { + Value::Array( + edits + .iter() + .map(|edit| { + let location = source_code.source_location(edit.start()); + let end_location = source_code.source_location(edit.end()); + + json!({ + "range": rdjson_range(&location, &end_location), + "text": edit.content().unwrap_or_default(), + }) + }) + .collect(), + ) +} + +fn rdjson_range(start: &SourceLocation, end: &SourceLocation) -> Value { + json!({ + "start": { + "line": start.row, + "column": start.column, + }, + "end": { + "line": end.row, + "column": end.column, + }, + }) +} + +#[cfg(test)] +mod tests { + use insta::assert_snapshot; + + use crate::message::tests::{capture_emitter_output, create_messages}; + use crate::message::RdjsonEmitter; + + #[test] + fn output() { + let mut emitter = RdjsonEmitter; + let content = capture_emitter_output(&mut emitter, &create_messages()); + + assert_snapshot!(content); + } +} diff --git a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__rdjson__tests__output.snap b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__rdjson__tests__output.snap new file mode 100644 index 0000000000000..cbb8d6c632796 --- /dev/null +++ b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__rdjson__tests__output.snap @@ -0,0 +1,103 @@ +--- +source: crates/ruff_linter/src/message/rdjson.rs +expression: content +--- +{ + "diagnostics": [ + { + "code": { + "url": "https://docs.astral.sh/ruff/rules/unused-import", + "value": "F401" + }, + "location": { + "path": "fib.py", + "range": { + "end": { + "column": 10, + "line": 1 + }, + "start": { + "column": 8, + "line": 1 + } + } + }, + "message": "`os` imported but unused", + "suggestions": [ + { + "range": { + "end": { + "column": 1, + "line": 2 + }, + "start": { + "column": 1, + "line": 1 + } + }, + "text": "" + } + ] + }, + { + "code": { + "url": "https://docs.astral.sh/ruff/rules/unused-variable", + "value": "F841" + }, + "location": { + "path": "fib.py", + "range": { + "end": { + "column": 6, + "line": 6 + }, + "start": { + "column": 5, + "line": 6 + } + } + }, + "message": "Local variable `x` is assigned to but never used", + "suggestions": [ + { + "range": { + "end": { + "column": 10, + "line": 6 + }, + "start": { + "column": 5, + "line": 6 + } + }, + "text": "" + } + ] + }, + { + "code": { + "url": "https://docs.astral.sh/ruff/rules/undefined-name", + "value": "F821" + }, + "location": { + "path": "undef.py", + "range": { + "end": { + "column": 5, + "line": 1 + }, + "start": { + "column": 4, + "line": 1 + } + } + }, + "message": "Undefined name `a`" + } + ], + "severity": "warning", + "source": { + "name": "ruff", + "url": "https://docs.astral.sh/ruff" + } +} diff --git a/crates/ruff_linter/src/settings/types.rs b/crates/ruff_linter/src/settings/types.rs index 877d705bb935e..25f7223b76aea 100644 --- a/crates/ruff_linter/src/settings/types.rs +++ b/crates/ruff_linter/src/settings/types.rs @@ -515,6 +515,7 @@ pub enum SerializationFormat { Github, Gitlab, Pylint, + Rdjson, Azure, Sarif, } @@ -532,6 +533,7 @@ impl Display for SerializationFormat { Self::Github => write!(f, "github"), Self::Gitlab => write!(f, "gitlab"), Self::Pylint => write!(f, "pylint"), + Self::Rdjson => write!(f, "rdjson"), Self::Azure => write!(f, "azure"), Self::Sarif => write!(f, "sarif"), } diff --git a/docs/configuration.md b/docs/configuration.md index 8f38279e25d27..d675a36da1261 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -599,7 +599,7 @@ Options: format is "concise". In preview mode, the default serialization format is "full" [env: RUFF_OUTPUT_FORMAT=] [possible values: text, concise, full, json, json-lines, junit, grouped, github, gitlab, - pylint, azure, sarif] + pylint, rdjson, azure, sarif] -o, --output-file Specify file to write the linter output to (default: stdout) [env: RUFF_OUTPUT_FILE=] diff --git a/ruff.schema.json b/ruff.schema.json index b146d9b74bcc2..936eeb575d63b 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3947,6 +3947,7 @@ "github", "gitlab", "pylint", + "rdjson", "azure", "sarif" ] From 94a3c53841d0b29587052b7814715be40b15e72b Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sun, 2 Jun 2024 22:59:48 +0100 Subject: [PATCH 08/25] Update UP035 for Python 3.13 and the latest version of typing_extensions (#11693) --- .../test/fixtures/pyupgrade/UP035.py | 15 ++- .../pyupgrade/rules/deprecated_import.rs | 75 +++++++++++-- ...er__rules__pyupgrade__tests__UP035.py.snap | 102 +++++++++++++++++- 3 files changed, 176 insertions(+), 16 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP035.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP035.py index bd30ac03e302a..5b2f11798e41a 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP035.py +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP035.py @@ -80,13 +80,13 @@ # OK from a import b -# OK: `typing_extensions` contains backported improvements. +# UP035 on py312+ only from typing_extensions import SupportsIndex -# OK: `typing_extensions` contains backported improvements. +# UP035 on py312+ only from typing_extensions import NamedTuple -# OK: `typing_extensions` supports `frozen_default` (backported from 3.12). +# UP035 on py312+ only: `typing_extensions` supports `frozen_default` (backported from 3.12). from typing_extensions import dataclass_transform # UP035 @@ -100,3 +100,12 @@ # UP035 from typing_extensions import get_original_bases + +# UP035 on py313+ only +from typing_extensions import TypeVar + +# UP035 on py313+ only +from typing_extensions import CapsuleType + +# UP035 on py313+ only +from typing_extensions import deprecated diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs index b8a24e41f526f..7777e13a4b957 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs @@ -154,12 +154,10 @@ const TYPING_EXTENSIONS_TO_TYPING: &[&str] = &[ "ClassVar", "Collection", "Container", - "ContextManager", "Coroutine", "DefaultDict", "Dict", "FrozenSet", - "Generator", "Generic", "Hashable", "IO", @@ -193,6 +191,8 @@ const TYPING_EXTENSIONS_TO_TYPING: &[&str] = &[ // Introduced in Python 3.5.2, but `typing_extensions` contains backported bugfixes and // optimizations, // "NewType", + // "Generator", + // "ContextManager", ]; // Python 3.7+ @@ -202,13 +202,16 @@ const MYPY_EXTENSIONS_TO_TYPING_37: &[&str] = &["NoReturn"]; // Members of `typing_extensions` that were moved to `typing`. const TYPING_EXTENSIONS_TO_TYPING_37: &[&str] = &[ - "AsyncContextManager", - "AsyncGenerator", "ChainMap", "Counter", "Deque", "ForwardRef", "NoReturn", + // Introduced in Python <=3.7, but `typing_extensions` backports some features + // from Python 3.12/3.13 + // "AsyncContextManager", + // "AsyncGenerator", + // "NamedTuple", ]; // Python 3.8+ @@ -220,12 +223,13 @@ const MYPY_EXTENSIONS_TO_TYPING_38: &[&str] = &["TypedDict"]; const TYPING_EXTENSIONS_TO_TYPING_38: &[&str] = &[ "Final", "OrderedDict", - "runtime_checkable", // Introduced in Python 3.8, but `typing_extensions` contains backported bugfixes and // optimizations. // "Literal", // "Protocol", // "SupportsIndex", + // "runtime_checkable", + // "TypedDict", ]; // Python 3.9+ @@ -332,12 +336,6 @@ const BACKPORTS_STR_ENUM_TO_ENUM_311: &[&str] = &["StrEnum"]; // Members of `typing_extensions` that were moved to `typing`. const TYPING_EXTENSIONS_TO_TYPING_312: &[&str] = &[ - // Introduced in Python 3.12, but `typing_extensions` backports some bug fixes. - // "NamedTuple", - - // Introduced in Python 3.12, but `typing_extensions` backports support for PEP 705. - // "TypedDict", - // Introduced in Python 3.8, but `typing_extensions` backports a ton of optimizations that were // added in Python 3.12. "Protocol", @@ -345,10 +343,13 @@ const TYPING_EXTENSIONS_TO_TYPING_312: &[&str] = &[ "SupportsBytes", "SupportsComplex", "SupportsFloat", + "SupportsIndex", "SupportsInt", "SupportsRound", "TypeAliasType", "Unpack", + // Introduced in Python 3.6, but `typing_extensions` backports bugfixes and features + "NamedTuple", // Introduced in Python 3.11, but `typing_extensions` backports the `frozen_default` argument, // which was introduced in Python 3.12. "dataclass_transform", @@ -361,6 +362,41 @@ const TYPING_EXTENSIONS_TO_COLLECTIONS_ABC_312: &[&str] = &["Buffer"]; // Members of `typing_extensions` that were moved to `types`. const TYPING_EXTENSIONS_TO_TYPES_312: &[&str] = &["get_original_bases"]; +// Python 3.13+ + +// Members of `typing_extensions` that were moved to `typing`. +const TYPING_EXTENSIONS_TO_TYPING_313: &[&str] = &[ + "get_protocol_members", + "is_protocol", + "NoDefault", + "ReadOnly", + "TypeIs", + // Introduced in Python 3.6, + // but typing_extensions backports features from py313: + "ContextManager", + "Generator", + // Introduced in Python 3.7, + // but typing_extensions backports features from py313: + "AsyncContextManager", + "AsyncGenerator", + // Introduced in Python 3.8, but typing_extensions + // backports features and bugfixes from py313: + "Protocol", + "TypedDict", + "runtime_checkable", + // Introduced in earlier Python versions, + // but typing_extensions backports PEP-696: + "ParamSpec", + "TypeVar", + "TypevarTuple", +]; + +// Members of `typing_extensions` that were moved to `types`. +const TYPING_EXTENSIONS_TO_TYPES_313: &[&str] = &["CapsuleType"]; + +// Members of typing_extensions that were moved to `warnings` +const TYPING_EXTENSIONS_TO_WARNINGS_313: &[&str] = &["deprecated"]; + struct ImportReplacer<'a> { stmt: &'a Stmt, module: &'a str, @@ -441,11 +477,25 @@ impl<'a> ImportReplacer<'a> { operations.push(operation); } + // `typing_extensions` to `warnings` + let mut typing_extensions_to_warnings = vec![]; + if self.version >= PythonVersion::Py313 { + typing_extensions_to_warnings.extend(TYPING_EXTENSIONS_TO_WARNINGS_313); + } + if let Some(operation) = + self.try_replace(&typing_extensions_to_warnings, "warnings") + { + operations.push(operation); + } + // `typing_extensions` to `types` let mut typing_extensions_to_types = vec![]; if self.version >= PythonVersion::Py312 { typing_extensions_to_types.extend(TYPING_EXTENSIONS_TO_TYPES_312); } + if self.version >= PythonVersion::Py313 { + typing_extensions_to_types.extend(TYPING_EXTENSIONS_TO_TYPES_313); + } if let Some(operation) = self.try_replace(&typing_extensions_to_types, "types") { operations.push(operation); } @@ -470,6 +520,9 @@ impl<'a> ImportReplacer<'a> { if self.version >= PythonVersion::Py312 { typing_extensions_to_typing.extend(TYPING_EXTENSIONS_TO_TYPING_312); } + if self.version >= PythonVersion::Py313 { + typing_extensions_to_typing.extend(TYPING_EXTENSIONS_TO_TYPING_313); + } if let Some(operation) = self.try_replace(&typing_extensions_to_typing, "typing") { operations.push(operation); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP035.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP035.py.snap index ddc6941a2c41d..e83b5d88e3ec8 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP035.py.snap +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP035.py.snap @@ -995,9 +995,49 @@ UP035.py:77:1: UP035 [*] Import from `collections.abc` instead: `Callable` 79 79 | 80 80 | # OK +UP035.py:84:1: UP035 [*] Import from `typing` instead: `SupportsIndex` + | +83 | # UP035 on py312+ only +84 | from typing_extensions import SupportsIndex + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 +85 | +86 | # UP035 on py312+ only + | + = help: Import from `typing` + +ℹ Safe fix +81 81 | from a import b +82 82 | +83 83 | # UP035 on py312+ only +84 |-from typing_extensions import SupportsIndex + 84 |+from typing import SupportsIndex +85 85 | +86 86 | # UP035 on py312+ only +87 87 | from typing_extensions import NamedTuple + +UP035.py:87:1: UP035 [*] Import from `typing` instead: `NamedTuple` + | +86 | # UP035 on py312+ only +87 | from typing_extensions import NamedTuple + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 +88 | +89 | # UP035 on py312+ only: `typing_extensions` supports `frozen_default` (backported from 3.12). + | + = help: Import from `typing` + +ℹ Safe fix +84 84 | from typing_extensions import SupportsIndex +85 85 | +86 86 | # UP035 on py312+ only +87 |-from typing_extensions import NamedTuple + 87 |+from typing import NamedTuple +88 88 | +89 89 | # UP035 on py312+ only: `typing_extensions` supports `frozen_default` (backported from 3.12). +90 90 | from typing_extensions import dataclass_transform + UP035.py:90:1: UP035 [*] Import from `typing` instead: `dataclass_transform` | -89 | # OK: `typing_extensions` supports `frozen_default` (backported from 3.12). +89 | # UP035 on py312+ only: `typing_extensions` supports `frozen_default` (backported from 3.12). 90 | from typing_extensions import dataclass_transform | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 91 | @@ -1008,7 +1048,7 @@ UP035.py:90:1: UP035 [*] Import from `typing` instead: `dataclass_transform` ℹ Safe fix 87 87 | from typing_extensions import NamedTuple 88 88 | -89 89 | # OK: `typing_extensions` supports `frozen_default` (backported from 3.12). +89 89 | # UP035 on py312+ only: `typing_extensions` supports `frozen_default` (backported from 3.12). 90 |-from typing_extensions import dataclass_transform 90 |+from typing import dataclass_transform 91 91 | @@ -1080,6 +1120,8 @@ UP035.py:102:1: UP035 [*] Import from `types` instead: `get_original_bases` 101 | # UP035 102 | from typing_extensions import get_original_bases | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 +103 | +104 | # UP035 on py313+ only | = help: Import from `types` @@ -1089,5 +1131,61 @@ UP035.py:102:1: UP035 [*] Import from `types` instead: `get_original_bases` 101 101 | # UP035 102 |-from typing_extensions import get_original_bases 102 |+from types import get_original_bases +103 103 | +104 104 | # UP035 on py313+ only +105 105 | from typing_extensions import TypeVar + +UP035.py:105:1: UP035 [*] Import from `typing` instead: `TypeVar` + | +104 | # UP035 on py313+ only +105 | from typing_extensions import TypeVar + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 +106 | +107 | # UP035 on py313+ only + | + = help: Import from `typing` + +ℹ Safe fix +102 102 | from typing_extensions import get_original_bases +103 103 | +104 104 | # UP035 on py313+ only +105 |-from typing_extensions import TypeVar + 105 |+from typing import TypeVar +106 106 | +107 107 | # UP035 on py313+ only +108 108 | from typing_extensions import CapsuleType + +UP035.py:108:1: UP035 [*] Import from `types` instead: `CapsuleType` + | +107 | # UP035 on py313+ only +108 | from typing_extensions import CapsuleType + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 +109 | +110 | # UP035 on py313+ only + | + = help: Import from `types` +ℹ Safe fix +105 105 | from typing_extensions import TypeVar +106 106 | +107 107 | # UP035 on py313+ only +108 |-from typing_extensions import CapsuleType + 108 |+from types import CapsuleType +109 109 | +110 110 | # UP035 on py313+ only +111 111 | from typing_extensions import deprecated + +UP035.py:111:1: UP035 [*] Import from `warnings` instead: `deprecated` + | +110 | # UP035 on py313+ only +111 | from typing_extensions import deprecated + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP035 + | + = help: Import from `warnings` +ℹ Safe fix +108 108 | from typing_extensions import CapsuleType +109 109 | +110 110 | # UP035 on py313+ only +111 |-from typing_extensions import deprecated + 111 |+from warnings import deprecated From 712783825dc2da819b0c51b9dbc6e3675926c5ae Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:03:03 -0400 Subject: [PATCH 09/25] Update Rust crate strum_macros to v0.26.3 (#11701) --- Cargo.lock | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ee4a7ef6bc2c..431a1cf39031e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,7 +350,7 @@ version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -868,12 +868,6 @@ dependencies = [ "allocator-api2", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -2758,11 +2752,11 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +checksum = "f7993a8e3a9e88a00351486baae9522c91b123a088f76469e5bd5cc17198ea87" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", "rustversion", From 25131da2c3c15cc8f68c76fdc52a3239a7e5c691 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:03:09 -0400 Subject: [PATCH 10/25] Update Rust crate toml to v0.8.13 (#11702) --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 431a1cf39031e..b6d9feafbf091 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2618,9 +2618,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" dependencies = [ "serde", ] @@ -2936,9 +2936,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" dependencies = [ "serde", "serde_spanned", @@ -2948,18 +2948,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.12" +version = "0.22.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" +checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" dependencies = [ "indexmap", "serde", From 010434015eb2f5348172a874a6151eb813f2ed1c Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 01:03:31 +0000 Subject: [PATCH 11/25] Update Rust crate proc-macro2 to v1.0.85 (#11700) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b6d9feafbf091..8a4b30dd92c98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1664,9 +1664,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.84" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" dependencies = [ "unicode-ident", ] From ec3f52392424b87557c64ba7a6defe865209c96d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 01:04:26 +0000 Subject: [PATCH 12/25] Update Rust crate insta to v1.39.0 (#11705) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a4b30dd92c98..b9983f4b9b6f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1023,9 +1023,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.38.0" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eab73f58e59ca6526037208f0e98851159ec1633cf17b6cd2e1f2c3fd5d53cc" +checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5" dependencies = [ "console", "globset", From 9599bd76228da00e9539da49a9dc4f799727fde4 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 01:05:17 +0000 Subject: [PATCH 13/25] Update Rust crate itertools to 0.13.0 (#11706) --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9983f4b9b6f4..0a4107d83f779 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1110,9 +1110,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -1914,7 +1914,7 @@ dependencies = [ "insta", "insta-cmd", "is-macro", - "itertools 0.12.1", + "itertools 0.13.0", "log", "mimalloc", "notify", @@ -1976,7 +1976,7 @@ dependencies = [ "filetime", "glob", "globset", - "itertools 0.12.1", + "itertools 0.13.0", "regex", "ruff_macros", "seahash", @@ -1992,7 +1992,7 @@ dependencies = [ "imara-diff", "indicatif", "indoc", - "itertools 0.12.1", + "itertools 0.13.0", "libcst", "pretty_assertions", "rayon", @@ -2074,7 +2074,7 @@ dependencies = [ "insta", "is-macro", "is-wsl", - "itertools 0.12.1", + "itertools 0.13.0", "libcst", "log", "memchr", @@ -2122,7 +2122,7 @@ dependencies = [ name = "ruff_macros" version = "0.0.0" dependencies = [ - "itertools 0.12.1", + "itertools 0.13.0", "proc-macro2", "quote", "ruff_python_trivia", @@ -2134,7 +2134,7 @@ name = "ruff_notebook" version = "0.0.0" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.13.0", "once_cell", "rand", "ruff_diagnostics", @@ -2155,7 +2155,7 @@ dependencies = [ "aho-corasick", "bitflags 2.5.0", "is-macro", - "itertools 0.12.1", + "itertools 0.13.0", "once_cell", "ruff_python_trivia", "ruff_source_file", @@ -2194,7 +2194,7 @@ dependencies = [ "clap", "countme", "insta", - "itertools 0.12.1", + "itertools 0.13.0", "memchr", "once_cell", "regex", @@ -2235,7 +2235,7 @@ name = "ruff_python_literal" version = "0.0.0" dependencies = [ "bitflags 2.5.0", - "itertools 0.12.1", + "itertools 0.13.0", "ruff_python_ast", "unic-ucd-category", ] @@ -2250,7 +2250,7 @@ dependencies = [ "bstr", "insta", "is-macro", - "itertools 0.12.1", + "itertools 0.13.0", "memchr", "ruff_python_ast", "ruff_source_file", @@ -2299,7 +2299,7 @@ dependencies = [ name = "ruff_python_trivia" version = "0.0.0" dependencies = [ - "itertools 0.12.1", + "itertools 0.13.0", "ruff_source_file", "ruff_text_size", "unicode-ident", @@ -2406,7 +2406,7 @@ dependencies = [ "globset", "ignore", "is-macro", - "itertools 0.12.1", + "itertools 0.13.0", "log", "matchit", "path-absolutize", diff --git a/Cargo.toml b/Cargo.toml index b86df19d20fc4..e8bf342ce83e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,7 +72,7 @@ insta = { version = "1.35.1", feature = ["filters", "glob"] } insta-cmd = { version = "0.6.0" } is-macro = { version = "0.3.5" } is-wsl = { version = "0.4.0" } -itertools = { version = "0.12.1" } +itertools = { version = "0.13.0" } js-sys = { version = "0.3.69" } jod-thread = { version = "0.1.2" } libc = { version = "0.2.153" } From 436dc18b15b6a6c27bbc22a9b4c17be822970eeb Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 01:05:32 +0000 Subject: [PATCH 14/25] Update Rust crate libcst to v1.4.0 (#11707) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a4107d83f779..502843058ab41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1172,9 +1172,9 @@ checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libcst" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f1e25d1b119ab5c2f15a6e081bb94a8d547c5c2ad065f5fd0dbb683f31ced91" +checksum = "10293a04a48e8b0cb2cc825a93b83090e527bffd3c897a0255ad7bc96079e920" dependencies = [ "chic", "libcst_derive", @@ -1187,9 +1187,9 @@ dependencies = [ [[package]] name = "libcst_derive" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5011f2d59093de14a4a90e01b9d85dee9276e58a25f0107dcee167dd601be0" +checksum = "a2ae40017ac09cd2c6a53504cb3c871c7f2b41466eac5bc66ba63f39073b467b" dependencies = [ "quote", "syn", From ded010cf9c9eb91f6c981cbafeb051fdd6ce6774 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:51:13 -0400 Subject: [PATCH 15/25] Update Rust crate tracing-tree to v0.3.1 (#11703) --- Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 502843058ab41..0a8a6690d2e42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1398,9 +1398,9 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c073d3c1930d0751774acf49e66653acecb416c3a54c6ec095a9b11caddb5a68" +checksum = "dd2800e1520bdc966782168a627aa5d1ad92e33b984bf7c7615d31280c83ff14" dependencies = [ "windows-sys 0.48.0", ] @@ -3044,11 +3044,11 @@ dependencies = [ [[package]] name = "tracing-tree" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65139ecd2c3f6484c3b99bc01c77afe21e95473630747c7aca525e78b0666675" +checksum = "b56c62d2c80033cb36fae448730a2f2ef99410fe3ecbffc916681a32f6807dbe" dependencies = [ - "nu-ansi-term 0.49.0", + "nu-ansi-term 0.50.0", "tracing-core", "tracing-log", "tracing-subscriber", From a9b6c4f2699742240393819c2b33586999d55d77 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:51:23 -0400 Subject: [PATCH 16/25] Update dependency monaco-editor to ^0.49.0 (#11710) --- playground/package-lock.json | 9 +++++---- playground/package.json | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/playground/package-lock.json b/playground/package-lock.json index eb7078f10651c..5ca1318b45b38 100644 --- a/playground/package-lock.json +++ b/playground/package-lock.json @@ -11,7 +11,7 @@ "@monaco-editor/react": "^4.4.6", "classnames": "^2.3.2", "lz-string": "^1.5.0", - "monaco-editor": "^0.48.0", + "monaco-editor": "^0.49.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-resizable-panels": "^2.0.0" @@ -3689,9 +3689,10 @@ } }, "node_modules/monaco-editor": { - "version": "0.48.0", - "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.48.0.tgz", - "integrity": "sha512-goSDElNqFfw7iDHMg8WDATkfcyeLTNpBHQpO8incK6p5qZt5G/1j41X0xdGzpIkGojGXM+QiRQyLjnfDVvrpwA==" + "version": "0.49.0", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.49.0.tgz", + "integrity": "sha512-2I8/T3X/hLxB2oPHgqcNYUVdA/ZEFShT7IAujifIPMfKkNbLOqY8XCoyHCXrsdjb36dW9MwoTwBCFpXKMwNwaQ==", + "license": "MIT" }, "node_modules/ms": { "version": "2.1.2", diff --git a/playground/package.json b/playground/package.json index e1228e6683c1f..820716049c5e6 100644 --- a/playground/package.json +++ b/playground/package.json @@ -18,7 +18,7 @@ "@monaco-editor/react": "^4.4.6", "classnames": "^2.3.2", "lz-string": "^1.5.0", - "monaco-editor": "^0.48.0", + "monaco-editor": "^0.49.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-resizable-panels": "^2.0.0" From 27085a93d9c59d55d03aa1abdb495c3e8d7d6c9d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:51:27 -0400 Subject: [PATCH 17/25] Update cloudflare/wrangler-action action to v3.6.1 (#11709) --- .github/workflows/docs.yaml | 2 +- .github/workflows/playground.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index de3816484439f..a0f31aba626da 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -47,7 +47,7 @@ jobs: run: mkdocs build --strict -f mkdocs.public.yml - name: "Deploy to Cloudflare Pages" if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }} - uses: cloudflare/wrangler-action@v3.5.0 + uses: cloudflare/wrangler-action@v3.6.1 with: apiToken: ${{ secrets.CF_API_TOKEN }} accountId: ${{ secrets.CF_ACCOUNT_ID }} diff --git a/.github/workflows/playground.yaml b/.github/workflows/playground.yaml index 96bf4788f4bcd..a0128f7d3e226 100644 --- a/.github/workflows/playground.yaml +++ b/.github/workflows/playground.yaml @@ -40,7 +40,7 @@ jobs: working-directory: playground - name: "Deploy to Cloudflare Pages" if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }} - uses: cloudflare/wrangler-action@v3.5.0 + uses: cloudflare/wrangler-action@v3.6.1 with: apiToken: ${{ secrets.CF_API_TOKEN }} accountId: ${{ secrets.CF_ACCOUNT_ID }} From 140c408a9206a2fad7e386a64ef6907621696a87 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:51:42 -0400 Subject: [PATCH 18/25] Update pre-commit dependencies (#11712) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6b3341672a59..a62b4b95cf252 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ exclude: | repos: - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.17 + rev: v0.18 hooks: - id: validate-pyproject @@ -32,7 +32,7 @@ repos: )$ - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.40.0 + rev: v0.41.0 hooks: - id: markdownlint-fix exclude: | @@ -56,7 +56,7 @@ repos: pass_filenames: false # This makes it a lot faster - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + rev: v0.4.7 hooks: - id: ruff-format - id: ruff From c69a789aa5dae5d4344f26381109883fb902d524 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 01:59:07 +0000 Subject: [PATCH 19/25] Update NPM Development dependencies (#11713) --- playground/api/package-lock.json | 83 ++++---- playground/api/package.json | 2 +- playground/package-lock.json | 333 ++++++++++++++++--------------- 3 files changed, 214 insertions(+), 204 deletions(-) diff --git a/playground/api/package-lock.json b/playground/api/package-lock.json index faae8a208beb6..ce9a0e430ca13 100644 --- a/playground/api/package-lock.json +++ b/playground/api/package-lock.json @@ -16,7 +16,7 @@ "@cloudflare/workers-types": "^4.20230801.0", "miniflare": "^3.20230801.1", "typescript": "^5.1.6", - "wrangler": "3.55.0" + "wrangler": "3.58.0" } }, "node_modules/@cloudflare/kv-asset-handler": { @@ -32,13 +32,14 @@ } }, "node_modules/@cloudflare/workerd-darwin-64": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workerd-darwin-64/-/workerd-darwin-64-1.20240419.0.tgz", - "integrity": "sha512-PGVe9sYWULHfvGhN0IZh8MsskNG/ufnBSqPbgFCxJHCTrVXLPuC35EoVaforyqjKRwj3U35XMyGo9KHcGnTeHQ==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workerd-darwin-64/-/workerd-darwin-64-1.20240524.0.tgz", + "integrity": "sha512-ATaXjefbTsrv4mpn4Fdua114RRDXcX5Ky+Mv+f4JTUllgalmqC4CYMN4jxRz9IpJU/fNMN8IEfvUyuJBAcl9Iw==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "darwin" @@ -48,13 +49,14 @@ } }, "node_modules/@cloudflare/workerd-darwin-arm64": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workerd-darwin-arm64/-/workerd-darwin-arm64-1.20240419.0.tgz", - "integrity": "sha512-z4etQSPiD5Gcjs962LiC7ZdmXnN6SGof5KrYoFiSI9X9kUvpuGH/lnjVVPd+NnVNeDU2kzmcAIgyZjkjTaqVXQ==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workerd-darwin-arm64/-/workerd-darwin-arm64-1.20240524.0.tgz", + "integrity": "sha512-wnbsZI4CS0QPCd+wnBHQ40C28A/2Qo4ESi1YhE2735G3UNcc876MWksZhsubd+XH0XPIra6eNFqyw6wRMpQOXA==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "darwin" @@ -64,13 +66,14 @@ } }, "node_modules/@cloudflare/workerd-linux-64": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workerd-linux-64/-/workerd-linux-64-1.20240419.0.tgz", - "integrity": "sha512-lBwhg0j3sYTFMsEb4bOClbVje8nqrYOu0H3feQlX+Eks94JIhWPkf8ywK4at/BUc1comPMhCgzDHwc2OMPUGgg==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workerd-linux-64/-/workerd-linux-64-1.20240524.0.tgz", + "integrity": "sha512-E8mj+HPBryKwaJAiNsYzXtVjKCL0KvUBZbtxJxlWM4mLSQhT+uwGT3nydb/hFY59rZnQgZslw0oqEWht5TEYiQ==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -80,13 +83,14 @@ } }, "node_modules/@cloudflare/workerd-linux-arm64": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workerd-linux-arm64/-/workerd-linux-arm64-1.20240419.0.tgz", - "integrity": "sha512-ZMY6wwWkxL+WPq8ydOp/irSYjAnMhBz1OC1+4z+OANtDs2beaZODmq7LEB3hb5WUAaTPY7DIjZh3DfDfty0nYg==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workerd-linux-arm64/-/workerd-linux-arm64-1.20240524.0.tgz", + "integrity": "sha512-/Fr1W671t2triNCDCBWdStxngnbUfZunZ/2e4kaMLzJDJLYDtYdmvOUCBDzUD4ssqmIMbn9RCQQ0U+CLEoqBqw==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -96,13 +100,14 @@ } }, "node_modules/@cloudflare/workerd-windows-64": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workerd-windows-64/-/workerd-windows-64-1.20240419.0.tgz", - "integrity": "sha512-YJjgaJN2yGTkV7Cr4K3i8N4dUwVQTclT3Pr3NpRZCcLjTszwlE53++XXDnHMKGXBbSguIizaVbmcU2EtmIXyeQ==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workerd-windows-64/-/workerd-windows-64-1.20240524.0.tgz", + "integrity": "sha512-G+ThDEx57g9mAEKqhWnHaaJgpeGYtyhkmwM/BDpLqPks/rAY5YEfZbY4YL1pNk1kkcZDXGrwIsY8xe9Apf5JdA==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "win32" @@ -112,10 +117,11 @@ } }, "node_modules/@cloudflare/workers-types": { - "version": "4.20240502.0", - "resolved": "https://registry.npmjs.org/@cloudflare/workers-types/-/workers-types-4.20240502.0.tgz", - "integrity": "sha512-OB1jIyPOzyOcuZFHWhsQnkRLN6u8+jmU9X3T4KZlGgn3Ivw8pBiswhLOp+yFeChR3Y4/5+V0hPFRko5SReordg==", - "dev": true + "version": "4.20240529.0", + "resolved": "https://registry.npmjs.org/@cloudflare/workers-types/-/workers-types-4.20240529.0.tgz", + "integrity": "sha512-W5obfjAwCNdYk3feUHtDfUxtTU6WIq83k6gmrLLJv+HkgCkOTwwrDNs+3w1Qln0tMj+FQx/fbwxw3ZuHIoyzGg==", + "dev": true, + "license": "MIT OR Apache-2.0" }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", @@ -1070,10 +1076,11 @@ } }, "node_modules/miniflare": { - "version": "3.20240419.1", - "resolved": "https://registry.npmjs.org/miniflare/-/miniflare-3.20240419.1.tgz", - "integrity": "sha512-Q9n0W07uUD/u0c/b03E4iogeXOAMjZnE3P7B5Yi8sPaZAx6TYWwjurGBja+Pg2yILN2iMaliEobfVyAKss33cA==", + "version": "3.20240524.1", + "resolved": "https://registry.npmjs.org/miniflare/-/miniflare-3.20240524.1.tgz", + "integrity": "sha512-5d3pRxvd5pT7lX1SsBH9+AjXuyHJnChSNOnYhubfi7pxMek4ZfULwhnUmNUp1R7b2xKuzqdFDZa0fsZuUoFxlw==", "dev": true, + "license": "MIT", "dependencies": { "@cspotcode/source-map-support": "0.8.1", "acorn": "^8.8.0", @@ -1083,7 +1090,7 @@ "glob-to-regexp": "^0.4.1", "stoppable": "^1.1.0", "undici": "^5.28.2", - "workerd": "1.20240419.0", + "workerd": "1.20240524.0", "ws": "^8.11.0", "youch": "^3.2.2", "zod": "^3.20.6" @@ -1496,11 +1503,12 @@ } }, "node_modules/workerd": { - "version": "1.20240419.0", - "resolved": "https://registry.npmjs.org/workerd/-/workerd-1.20240419.0.tgz", - "integrity": "sha512-9yV98KpkQgG+bdEsKEW8i1AYZgxns6NVSfdOVEB2Ue1pTMtIEYfUyqUE+O2amisRrfaC3Pw4EvjtTmVaoetfeg==", + "version": "1.20240524.0", + "resolved": "https://registry.npmjs.org/workerd/-/workerd-1.20240524.0.tgz", + "integrity": "sha512-LWLe5D8PVHBcqturmBbwgI71r7YPpIMYZoVEH6S4G35EqIJ55cb0n3FipoSyraoIfpcCxCFxX1K6WsRHbP3pFA==", "dev": true, "hasInstallScript": true, + "license": "Apache-2.0", "bin": { "workerd": "bin/workerd" }, @@ -1508,18 +1516,19 @@ "node": ">=16" }, "optionalDependencies": { - "@cloudflare/workerd-darwin-64": "1.20240419.0", - "@cloudflare/workerd-darwin-arm64": "1.20240419.0", - "@cloudflare/workerd-linux-64": "1.20240419.0", - "@cloudflare/workerd-linux-arm64": "1.20240419.0", - "@cloudflare/workerd-windows-64": "1.20240419.0" + "@cloudflare/workerd-darwin-64": "1.20240524.0", + "@cloudflare/workerd-darwin-arm64": "1.20240524.0", + "@cloudflare/workerd-linux-64": "1.20240524.0", + "@cloudflare/workerd-linux-arm64": "1.20240524.0", + "@cloudflare/workerd-windows-64": "1.20240524.0" } }, "node_modules/wrangler": { - "version": "3.55.0", - "resolved": "https://registry.npmjs.org/wrangler/-/wrangler-3.55.0.tgz", - "integrity": "sha512-VhtCioKxOdVqkHa8jQ6C6bX3by2Ko0uM0DKzrA+6lBZvfDUlGDWSOPiG+1fOHBHj2JTVBntxWCztXP6L+Udr8w==", + "version": "3.58.0", + "resolved": "https://registry.npmjs.org/wrangler/-/wrangler-3.58.0.tgz", + "integrity": "sha512-h9gWER7LXLnmHABDNP1p3aqXtchlvSBN8Dp22ZurnkxaLMZ3L3H1Ze1ftiFSs0VRWv0BUnz7AWIUqZmzuBY4Nw==", "dev": true, + "license": "MIT OR Apache-2.0", "dependencies": { "@cloudflare/kv-asset-handler": "0.3.2", "@esbuild-plugins/node-globals-polyfill": "^0.2.3", @@ -1527,7 +1536,7 @@ "blake3-wasm": "^2.1.5", "chokidar": "^3.5.3", "esbuild": "0.17.19", - "miniflare": "3.20240419.1", + "miniflare": "3.20240524.1", "nanoid": "^3.3.3", "path-to-regexp": "^6.2.0", "resolve": "^1.22.8", @@ -1547,7 +1556,7 @@ "fsevents": "~2.3.2" }, "peerDependencies": { - "@cloudflare/workers-types": "^4.20240419.0" + "@cloudflare/workers-types": "^4.20240524.0" }, "peerDependenciesMeta": { "@cloudflare/workers-types": { diff --git a/playground/api/package.json b/playground/api/package.json index cc61a486fc0d9..9bb8ee4de2e47 100644 --- a/playground/api/package.json +++ b/playground/api/package.json @@ -5,7 +5,7 @@ "@cloudflare/workers-types": "^4.20230801.0", "miniflare": "^3.20230801.1", "typescript": "^5.1.6", - "wrangler": "3.55.0" + "wrangler": "3.58.0" }, "private": true, "scripts": { diff --git a/playground/package-lock.json b/playground/package-lock.json index 5ca1318b45b38..e625bf8ce2cd8 100644 --- a/playground/package-lock.json +++ b/playground/package-lock.json @@ -809,14 +809,15 @@ ] }, "node_modules/@swc/core": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core/-/core-1.4.11.tgz", - "integrity": "sha512-WKEakMZxkVwRdgMN4AMJ9K5nysY8g8npgQPczmjBeNK5In7QEAZAJwnyccrWwJZU0XjVeHn2uj+XbOKdDW17rg==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core/-/core-1.5.24.tgz", + "integrity": "sha512-Eph9zvO4xvqWZGVzTdtdEJ0Vqf0VIML/o/e4Qd2RLOqtfgnlRi7avmMu5C0oqciJ0tk+hqdUKVUZ4JPoPaiGvQ==", "dev": true, "hasInstallScript": true, + "license": "Apache-2.0", "dependencies": { - "@swc/counter": "^0.1.2", - "@swc/types": "^0.1.5" + "@swc/counter": "^0.1.3", + "@swc/types": "^0.1.7" }, "engines": { "node": ">=10" @@ -826,19 +827,19 @@ "url": "https://opencollective.com/swc" }, "optionalDependencies": { - "@swc/core-darwin-arm64": "1.4.11", - "@swc/core-darwin-x64": "1.4.11", - "@swc/core-linux-arm-gnueabihf": "1.4.11", - "@swc/core-linux-arm64-gnu": "1.4.11", - "@swc/core-linux-arm64-musl": "1.4.11", - "@swc/core-linux-x64-gnu": "1.4.11", - "@swc/core-linux-x64-musl": "1.4.11", - "@swc/core-win32-arm64-msvc": "1.4.11", - "@swc/core-win32-ia32-msvc": "1.4.11", - "@swc/core-win32-x64-msvc": "1.4.11" + "@swc/core-darwin-arm64": "1.5.24", + "@swc/core-darwin-x64": "1.5.24", + "@swc/core-linux-arm-gnueabihf": "1.5.24", + "@swc/core-linux-arm64-gnu": "1.5.24", + "@swc/core-linux-arm64-musl": "1.5.24", + "@swc/core-linux-x64-gnu": "1.5.24", + "@swc/core-linux-x64-musl": "1.5.24", + "@swc/core-win32-arm64-msvc": "1.5.24", + "@swc/core-win32-ia32-msvc": "1.5.24", + "@swc/core-win32-x64-msvc": "1.5.24" }, "peerDependencies": { - "@swc/helpers": "^0.5.0" + "@swc/helpers": "*" }, "peerDependenciesMeta": { "@swc/helpers": { @@ -847,13 +848,14 @@ } }, "node_modules/@swc/core-darwin-arm64": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.4.11.tgz", - "integrity": "sha512-C1j1Qp/IHSelVWdEnT7f0iONWxQz6FAqzjCF2iaL+0vFg4V5f2nlgrueY8vj5pNNzSGhrAlxsMxEIp4dj1MXkg==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.5.24.tgz", + "integrity": "sha512-M7oLOcC0sw+UTyAuL/9uyB9GeO4ZpaBbH76JSH6g1m0/yg7LYJZGRmplhDmwVSDAR5Fq4Sjoi1CksmmGkgihGA==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "darwin" @@ -863,13 +865,14 @@ } }, "node_modules/@swc/core-darwin-x64": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.4.11.tgz", - "integrity": "sha512-0TTy3Ni8ncgaMCchSQ7FK8ZXQLlamy0FXmGWbR58c+pVZWYZltYPTmheJUvVcR0H2+gPAymRKyfC0iLszDALjg==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.5.24.tgz", + "integrity": "sha512-MfcFjGGYognpSBSos2pYUNYJSmqEhuw5ceGr6qAdME7ddbjGXliza4W6FggsM+JnWwpqa31+e7/R+GetW4WkaQ==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "darwin" @@ -879,13 +882,14 @@ } }, "node_modules/@swc/core-linux-arm-gnueabihf": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.4.11.tgz", - "integrity": "sha512-XJLB71uw0rog4DjYAPxFGAuGCBQpgJDlPZZK6MTmZOvI/1t0+DelJ24IjHIxk500YYM26Yv47xPabqFPD7I2zQ==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.5.24.tgz", + "integrity": "sha512-amI2pwtcWV3E/m/nf+AQtn1LWDzKLZyjCmWd3ms7QjEueWYrY8cU1Y4Wp7wNNsxIoPOi8zek1Uj2wwFD/pttNQ==", "cpu": [ "arm" ], "dev": true, + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -895,13 +899,14 @@ } }, "node_modules/@swc/core-linux-arm64-gnu": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.4.11.tgz", - "integrity": "sha512-vYQwzJvm/iu052d5Iw27UFALIN5xSrGkPZXxLNMHPySVko2QMNNBv35HLatkEQHbQ3X+VKSW9J9SkdtAvAVRAQ==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.5.24.tgz", + "integrity": "sha512-sTSvmqMmgT1ynH/nP75Pc51s+iT4crZagHBiDOf5cq+kudUYjda9lWMs7xkXB/TUKFHPCRK0HGunl8bkwiIbuw==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "linux" @@ -911,13 +916,14 @@ } }, "node_modules/@swc/core-linux-arm64-musl": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.4.11.tgz", - "integrity": "sha512-eV+KduiRYUFjPsvbZuJ9aknQH9Tj0U2/G9oIZSzLx/18WsYi+upzHbgxmIIHJ2VJgfd7nN40RI/hMtxNsUzR/g==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.5.24.tgz", + "integrity": "sha512-vd2/hfOBGbrX21FxsFdXCUaffjkHvlZkeE2UMRajdXifwv79jqOHIJg3jXG1F3ZrhCghCzirFts4tAZgcG8XWg==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "linux" @@ -927,13 +933,14 @@ } }, "node_modules/@swc/core-linux-x64-gnu": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.4.11.tgz", - "integrity": "sha512-WA1iGXZ2HpqM1OR9VCQZJ8sQ1KP2or9O4bO8vWZo6HZJIeoQSo7aa9waaCLRpkZvkng1ct/TF/l6ymqSNFXIzQ==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.5.24.tgz", + "integrity": "sha512-Zrdzi7NqzQxm2BvAG5KyOSBEggQ7ayrxh599AqqevJmsUXJ8o2nMiWQOBvgCGp7ye+Biz3pvZn1EnRzAp+TpUg==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "linux" @@ -943,13 +950,14 @@ } }, "node_modules/@swc/core-linux-x64-musl": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.4.11.tgz", - "integrity": "sha512-UkVJToKf0owwQYRnGvjHAeYVDfeimCEcx0VQSbJoN7Iy0ckRZi7YPlmWJU31xtKvikE2bQWCOVe0qbSDqqcWXA==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.5.24.tgz", + "integrity": "sha512-1F8z9NRi52jdZQCGc5sflwYSctL6omxiVmIFVp8TC9nngjQKc00TtX/JC2Eo2HwvgupkFVl5YQJidAck9YtmJw==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "linux" @@ -959,13 +967,14 @@ } }, "node_modules/@swc/core-win32-arm64-msvc": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.4.11.tgz", - "integrity": "sha512-35khwkyly7lF5NDSyvIrukBMzxPorgc5iTSDfVO/LvnmN5+fm4lTlrDr4tUfTdOhv3Emy7CsKlsNAeFRJ+Pm+w==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.5.24.tgz", + "integrity": "sha512-cKpP7KvS6Xr0jFSTBXY53HZX/YfomK5EMQYpCVDOvfsZeYHN20sQSKXfpVLvA/q2igVt1zzy1XJcOhpJcgiKLg==", "cpu": [ "arm64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "win32" @@ -975,13 +984,14 @@ } }, "node_modules/@swc/core-win32-ia32-msvc": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.4.11.tgz", - "integrity": "sha512-Wx8/6f0ufgQF2pbVPsJ2dAmFLwIOW+xBE5fxnb7VnEbGkTgP1qMDWiiAtD9rtvDSuODG3i1AEmAak/2HAc6i6A==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.5.24.tgz", + "integrity": "sha512-IoPWfi0iwqjZuf7gE223+B97/ZwkKbu7qL5KzGP7g3hJrGSKAvv7eC5Y9r2iKKtLKyv5R/T6Ho0kFR/usi7rHw==", "cpu": [ "ia32" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "win32" @@ -991,13 +1001,14 @@ } }, "node_modules/@swc/core-win32-x64-msvc": { - "version": "1.4.11", - "resolved": "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.4.11.tgz", - "integrity": "sha512-0xRFW6K9UZQH2NVC/0pVB0GJXS45lY24f+6XaPBF1YnMHd8A8GoHl7ugyM5yNUTe2AKhSgk5fJV00EJt/XBtdQ==", + "version": "1.5.24", + "resolved": "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.5.24.tgz", + "integrity": "sha512-zHgF2k1uVJL8KIW+PnVz1To4a3Cz9THbh2z2lbehaF/gKHugH4c3djBozU4das1v35KOqf5jWIEviBLql2wDLQ==", "cpu": [ "x64" ], "dev": true, + "license": "Apache-2.0 AND MIT", "optional": true, "os": [ "win32" @@ -1010,13 +1021,15 @@ "version": "0.1.3", "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==", - "dev": true + "dev": true, + "license": "Apache-2.0" }, "node_modules/@swc/types": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/@swc/types/-/types-0.1.6.tgz", - "integrity": "sha512-/JLo/l2JsT/LRd80C3HfbmVpxOAJ11FO2RCEslFrgzLltoP9j8XIbsyDcfCt2WWyX+CM96rBoNM+IToAkFOugg==", + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/@swc/types/-/types-0.1.7.tgz", + "integrity": "sha512-scHWahbHF0eyj3JsxG9CFJgFdFNaVQCNAimBlT6PzS3n/HptxqREjsm4OH6AN3lYcffZYSPxXW8ua2BEHp0lJQ==", "dev": true, + "license": "Apache-2.0", "dependencies": { "@swc/counter": "^0.1.3" } @@ -1027,12 +1040,6 @@ "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", "dev": true }, - "node_modules/@types/json-schema": { - "version": "7.0.15", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", - "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "dev": true - }, "node_modules/@types/json5": { "version": "0.0.29", "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", @@ -1046,10 +1053,11 @@ "dev": true }, "node_modules/@types/react": { - "version": "18.3.2", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.2.tgz", - "integrity": "sha512-Btgg89dAnqD4vV7R3hlwOxgqobUQKgx3MmrQRi0yYbs/P0ym8XozIAlkqVilPqHQwXs4e9Tf63rrCgl58BcO4w==", + "version": "18.3.3", + "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.3.tgz", + "integrity": "sha512-hti/R0pS0q1/xx+TsI73XIqk26eBsISZ2R0wUijXIngRK9R/e7Xw/cXVxQK7R5JjW+SV4zGcn5hXjudkN/pLIw==", "dev": true, + "license": "MIT", "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -1064,28 +1072,21 @@ "@types/react": "*" } }, - "node_modules/@types/semver": { - "version": "7.5.8", - "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.8.tgz", - "integrity": "sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ==", - "dev": true - }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.8.0.tgz", - "integrity": "sha512-gFTT+ezJmkwutUPmB0skOj3GZJtlEGnlssems4AjkVweUPGj7jRwwqg0Hhg7++kPGJqKtTYx+R05Ftww372aIg==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.11.0.tgz", + "integrity": "sha512-P+qEahbgeHW4JQ/87FuItjBj8O3MYv5gELDzr8QaQ7fsll1gSMTYb6j87MYyxwf3DtD7uGFB9ShwgmCJB5KmaQ==", "dev": true, + "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "7.8.0", - "@typescript-eslint/type-utils": "7.8.0", - "@typescript-eslint/utils": "7.8.0", - "@typescript-eslint/visitor-keys": "7.8.0", - "debug": "^4.3.4", + "@typescript-eslint/scope-manager": "7.11.0", + "@typescript-eslint/type-utils": "7.11.0", + "@typescript-eslint/utils": "7.11.0", + "@typescript-eslint/visitor-keys": "7.11.0", "graphemer": "^1.4.0", "ignore": "^5.3.1", "natural-compare": "^1.4.0", - "semver": "^7.6.0", "ts-api-utils": "^1.3.0" }, "engines": { @@ -1106,15 +1107,16 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.8.0.tgz", - "integrity": "sha512-KgKQly1pv0l4ltcftP59uQZCi4HUYswCLbTqVZEJu7uLX8CTLyswqMLqLN+2QFz4jCptqWVV4SB7vdxcH2+0kQ==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.11.0.tgz", + "integrity": "sha512-yimw99teuaXVWsBcPO1Ais02kwJ1jmNA1KxE7ng0aT7ndr1pT1wqj0OJnsYVGKKlc4QJai86l/025L6z8CljOg==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { - "@typescript-eslint/scope-manager": "7.8.0", - "@typescript-eslint/types": "7.8.0", - "@typescript-eslint/typescript-estree": "7.8.0", - "@typescript-eslint/visitor-keys": "7.8.0", + "@typescript-eslint/scope-manager": "7.11.0", + "@typescript-eslint/types": "7.11.0", + "@typescript-eslint/typescript-estree": "7.11.0", + "@typescript-eslint/visitor-keys": "7.11.0", "debug": "^4.3.4" }, "engines": { @@ -1134,13 +1136,14 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.8.0.tgz", - "integrity": "sha512-viEmZ1LmwsGcnr85gIq+FCYI7nO90DVbE37/ll51hjv9aG+YZMb4WDE2fyWpUR4O/UrhGRpYXK/XajcGTk2B8g==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.11.0.tgz", + "integrity": "sha512-27tGdVEiutD4POirLZX4YzT180vevUURJl4wJGmm6TrQoiYwuxTIY98PBp6L2oN+JQxzE0URvYlzJaBHIekXAw==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/types": "7.8.0", - "@typescript-eslint/visitor-keys": "7.8.0" + "@typescript-eslint/types": "7.11.0", + "@typescript-eslint/visitor-keys": "7.11.0" }, "engines": { "node": "^18.18.0 || >=20.0.0" @@ -1151,13 +1154,14 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-7.8.0.tgz", - "integrity": "sha512-H70R3AefQDQpz9mGv13Uhi121FNMh+WEaRqcXTX09YEDky21km4dV1ZXJIp8QjXc4ZaVkXVdohvWDzbnbHDS+A==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-7.11.0.tgz", + "integrity": "sha512-WmppUEgYy+y1NTseNMJ6mCFxt03/7jTOy08bcg7bxJJdsM4nuhnchyBbE8vryveaJUf62noH7LodPSo5Z0WUCg==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/typescript-estree": "7.8.0", - "@typescript-eslint/utils": "7.8.0", + "@typescript-eslint/typescript-estree": "7.11.0", + "@typescript-eslint/utils": "7.11.0", "debug": "^4.3.4", "ts-api-utils": "^1.3.0" }, @@ -1178,10 +1182,11 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.8.0.tgz", - "integrity": "sha512-wf0peJ+ZGlcH+2ZS23aJbOv+ztjeeP8uQ9GgwMJGVLx/Nj9CJt17GWgWWoSmoRVKAX2X+7fzEnAjxdvK2gqCLw==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.11.0.tgz", + "integrity": "sha512-MPEsDRZTyCiXkD4vd3zywDCifi7tatc4K37KqTprCvaXptP7Xlpdw0NR2hRJTetG5TxbWDB79Ys4kLmHliEo/w==", "dev": true, + "license": "MIT", "engines": { "node": "^18.18.0 || >=20.0.0" }, @@ -1191,13 +1196,14 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.8.0.tgz", - "integrity": "sha512-5pfUCOwK5yjPaJQNy44prjCwtr981dO8Qo9J9PwYXZ0MosgAbfEMB008dJ5sNo3+/BN6ytBPuSvXUg9SAqB0dg==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.11.0.tgz", + "integrity": "sha512-cxkhZ2C/iyi3/6U9EPc5y+a6csqHItndvN/CzbNXTNrsC3/ASoYQZEt9uMaEp+xFNjasqQyszp5TumAVKKvJeQ==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { - "@typescript-eslint/types": "7.8.0", - "@typescript-eslint/visitor-keys": "7.8.0", + "@typescript-eslint/types": "7.11.0", + "@typescript-eslint/visitor-keys": "7.11.0", "debug": "^4.3.4", "globby": "^11.1.0", "is-glob": "^4.0.3", @@ -1223,6 +1229,7 @@ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", "dev": true, + "license": "MIT", "dependencies": { "balanced-match": "^1.0.0" } @@ -1232,6 +1239,7 @@ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz", "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==", "dev": true, + "license": "ISC", "dependencies": { "brace-expansion": "^2.0.1" }, @@ -1243,18 +1251,16 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-7.8.0.tgz", - "integrity": "sha512-L0yFqOCflVqXxiZyXrDr80lnahQfSOfc9ELAAZ75sqicqp2i36kEZZGuUymHNFoYOqxRT05up760b4iGsl02nQ==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-7.11.0.tgz", + "integrity": "sha512-xlAWwPleNRHwF37AhrZurOxA1wyXowW4PqVXZVUNCLjB48CqdPJoJWkrpH2nij9Q3Lb7rtWindtoXwxjxlKKCA==", "dev": true, + "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.4.0", - "@types/json-schema": "^7.0.15", - "@types/semver": "^7.5.8", - "@typescript-eslint/scope-manager": "7.8.0", - "@typescript-eslint/types": "7.8.0", - "@typescript-eslint/typescript-estree": "7.8.0", - "semver": "^7.6.0" + "@typescript-eslint/scope-manager": "7.11.0", + "@typescript-eslint/types": "7.11.0", + "@typescript-eslint/typescript-estree": "7.11.0" }, "engines": { "node": "^18.18.0 || >=20.0.0" @@ -1268,12 +1274,13 @@ } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.8.0.tgz", - "integrity": "sha512-q4/gibTNBQNA0lGyYQCmWRS5D15n8rXh4QjK3KV+MBPlTYHpfBUT3D3PaPR/HeNiI9W6R7FvlkcGhNyAoP+caA==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.11.0.tgz", + "integrity": "sha512-7syYk4MzjxTEk0g/w3iqtgxnFQspDJfn6QKD36xMuuhTzjcxY7F8EmBLnALjVyaOF1/bVocu3bS/2/F7rXrveQ==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/types": "7.8.0", + "@typescript-eslint/types": "7.11.0", "eslint-visitor-keys": "^3.4.3" }, "engines": { @@ -1291,12 +1298,13 @@ "dev": true }, "node_modules/@vitejs/plugin-react-swc": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.6.0.tgz", - "integrity": "sha512-XFRbsGgpGxGzEV5i5+vRiro1bwcIaZDIdBRP16qwm+jP68ue/S8FJTBEgOeojtVDYrbSua3XFp71kC8VJE6v+g==", + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz", + "integrity": "sha512-yrknSb3Dci6svCd/qhHqhFPDSw0QtjumcqdKMoNNzmOl5lMXTTiqzjWtG4Qask2HdvvzaNgSunbQGet8/GrKdA==", "dev": true, + "license": "MIT", "dependencies": { - "@swc/core": "^1.3.107" + "@swc/core": "^1.5.7" }, "peerDependencies": { "vite": "^4 || ^5" @@ -1435,6 +1443,7 @@ "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -1984,6 +1993,7 @@ "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", "dev": true, + "license": "MIT", "dependencies": { "path-type": "^4.0.0" }, @@ -2016,10 +2026,11 @@ "dev": true }, "node_modules/es-abstract": { - "version": "1.23.2", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.2.tgz", - "integrity": "sha512-60s3Xv2T2p1ICykc7c+DNDPLDMm9t4QxCOUU0K9JxiLjM3C1zB9YVdN7tjxrFd4+AkZ8CdX1ovUga4P2+1e+/w==", + "version": "1.23.3", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.3.tgz", + "integrity": "sha512-e+HfNH61Bj1X9/jLc5v1owaLYuHdeHHSQlkhCBiTK8rBvKaULl/beGMxwrMXjpYrv4pz22BlY570vVePA2ho4A==", "dev": true, + "license": "MIT", "dependencies": { "array-buffer-byte-length": "^1.0.1", "arraybuffer.prototype.slice": "^1.0.3", @@ -2060,11 +2071,11 @@ "safe-regex-test": "^1.0.3", "string.prototype.trim": "^1.2.9", "string.prototype.trimend": "^1.0.8", - "string.prototype.trimstart": "^1.0.7", + "string.prototype.trimstart": "^1.0.8", "typed-array-buffer": "^1.0.2", "typed-array-byte-length": "^1.0.1", "typed-array-byte-offset": "^1.0.2", - "typed-array-length": "^1.0.5", + "typed-array-length": "^1.0.6", "unbox-primitive": "^1.0.2", "which-typed-array": "^1.1.15" }, @@ -2097,14 +2108,15 @@ } }, "node_modules/es-iterator-helpers": { - "version": "1.0.18", - "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.18.tgz", - "integrity": "sha512-scxAJaewsahbqTYrGKJihhViaM6DDZDDoucfvzNbK0pOren1g/daDQ3IAhzn+1G14rBG7w+i5N+qul60++zlKA==", + "version": "1.0.19", + "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.19.tgz", + "integrity": "sha512-zoMwbCcH5hwUkKJkT8kDIBZSz9I6mVG//+lDCinLCGov4+r7NIy0ld8o03M0cJxl2spVf6ESYVS6/gpIfq1FFw==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", - "es-abstract": "^1.23.0", + "es-abstract": "^1.23.3", "es-errors": "^1.3.0", "es-set-tostringtag": "^2.0.3", "function-bind": "^1.1.2", @@ -2454,29 +2466,30 @@ } }, "node_modules/eslint-plugin-react": { - "version": "7.34.1", - "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.34.1.tgz", - "integrity": "sha512-N97CxlouPT1AHt8Jn0mhhN2RrADlUAsk1/atcT2KyA/l9Q/E6ll7OIGwNumFmWfZ9skV3XXccYS19h80rHtgkw==", + "version": "7.34.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.34.2.tgz", + "integrity": "sha512-2HCmrU+/JNigDN6tg55cRDKCQWicYAPB38JGSFDQt95jDm8rrvSUo7YPkOIm5l6ts1j1zCvysNcasvfTMQzUOw==", "dev": true, + "license": "MIT", "dependencies": { - "array-includes": "^3.1.7", - "array.prototype.findlast": "^1.2.4", + "array-includes": "^3.1.8", + "array.prototype.findlast": "^1.2.5", "array.prototype.flatmap": "^1.3.2", "array.prototype.toreversed": "^1.1.2", "array.prototype.tosorted": "^1.1.3", "doctrine": "^2.1.0", - "es-iterator-helpers": "^1.0.17", + "es-iterator-helpers": "^1.0.19", "estraverse": "^5.3.0", "jsx-ast-utils": "^2.4.1 || ^3.0.0", "minimatch": "^3.1.2", - "object.entries": "^1.1.7", - "object.fromentries": "^2.0.7", - "object.hasown": "^1.1.3", - "object.values": "^1.1.7", + "object.entries": "^1.1.8", + "object.fromentries": "^2.0.8", + "object.hasown": "^1.1.4", + "object.values": "^1.2.0", "prop-types": "^15.8.1", "resolve": "^2.0.0-next.5", "semver": "^6.3.1", - "string.prototype.matchall": "^4.0.10" + "string.prototype.matchall": "^4.0.11" }, "engines": { "node": ">=4" @@ -2502,6 +2515,7 @@ "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", "dev": true, + "license": "Apache-2.0", "dependencies": { "esutils": "^2.0.2" }, @@ -2514,6 +2528,7 @@ "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz", "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==", "dev": true, + "license": "MIT", "dependencies": { "is-core-module": "^2.13.0", "path-parse": "^1.0.7", @@ -2531,6 +2546,7 @@ "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true, + "license": "ISC", "bin": { "semver": "bin/semver.js" } @@ -2924,6 +2940,7 @@ "resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz", "integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==", "dev": true, + "license": "MIT", "dependencies": { "array-union": "^2.1.0", "dir-glob": "^3.0.1", @@ -3625,18 +3642,6 @@ "loose-envify": "cli.js" } }, - "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dev": true, - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, "node_modules/lz-string": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", @@ -3999,6 +4004,7 @@ "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -4189,10 +4195,11 @@ } }, "node_modules/prettier": { - "version": "3.2.5", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.2.5.tgz", - "integrity": "sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==", + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.0.tgz", + "integrity": "sha512-J9odKxERhCQ10OC2yb93583f6UnYutOeiV5i0zEDS7UGTdUt0u+y8erxl3lBKvwo/JHyyoEdXjwp4dke9oyZ/g==", "dev": true, + "license": "MIT", "bin": { "prettier": "bin/prettier.cjs" }, @@ -4503,13 +4510,11 @@ } }, "node_modules/semver": { - "version": "7.6.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", - "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz", + "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==", "dev": true, - "dependencies": { - "lru-cache": "^6.0.0" - }, + "license": "ISC", "bin": { "semver": "bin/semver.js" }, @@ -4593,6 +4598,7 @@ "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -5084,10 +5090,11 @@ "dev": true }, "node_modules/vite": { - "version": "5.2.11", - "resolved": "https://registry.npmjs.org/vite/-/vite-5.2.11.tgz", - "integrity": "sha512-HndV31LWW05i1BLPMUCE1B9E9GFbOu1MbenhS58FuK6owSO5qHm7GiCotrNY1YE5rMeQSFBGmT5ZaLEjFizgiQ==", + "version": "5.2.12", + "resolved": "https://registry.npmjs.org/vite/-/vite-5.2.12.tgz", + "integrity": "sha512-/gC8GxzxMK5ntBwb48pR32GGhENnjtY30G4A0jemunsBkiEZFw60s8InGpN8gkhHEkjnRK1aSAxeQgwvFhUHAA==", "dev": true, + "license": "MIT", "dependencies": { "esbuild": "^0.20.1", "postcss": "^8.4.38", @@ -5238,12 +5245,6 @@ "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "dev": true }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "dev": true - }, "node_modules/yaml": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz", From bf5b62edaccdc007dfcf559ab9e870be66c19877 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:23:50 +0530 Subject: [PATCH 20/25] Maintain synchronicity between the lexer and the parser (#11457) ## Summary This PR updates the entire parser stack in multiple ways: ### Make the lexer lazy * https://github.com/astral-sh/ruff/pull/11244 * https://github.com/astral-sh/ruff/pull/11473 Previously, Ruff's lexer would act as an iterator. The parser would collect all the tokens in a vector first and then process the tokens to create the syntax tree. The first task in this project is to update the entire parsing flow to make the lexer lazy. This includes the `Lexer`, `TokenSource`, and `Parser`. For context, the `TokenSource` is a wrapper around the `Lexer` to filter out the trivia tokens[^1]. Now, the parser will ask the token source to get the next token and only then the lexer will continue and emit the token. This means that the lexer needs to be aware of the "current" token. When the `next_token` is called, the current token will be updated with the newly lexed token. The main motivation to make the lexer lazy is to allow re-lexing a token in a different context. This is going to be really useful to make the parser error resilience. For example, currently the emitted tokens remains the same even if the parser can recover from an unclosed parenthesis. This is important because the lexer emits a `NonLogicalNewline` in parenthesized context while a normal `Newline` in non-parenthesized context. This different kinds of newline is also used to emit the indentation tokens which is important for the parser as it's used to determine the start and end of a block. Additionally, this allows us to implement the following functionalities: 1. Checkpoint - rewind infrastructure: The idea here is to create a checkpoint and continue lexing. At a later point, this checkpoint can be used to rewind the lexer back to the provided checkpoint. 2. Remove the `SoftKeywordTransformer` and instead use lookahead or speculative parsing to determine whether a soft keyword is a keyword or an identifier 3. Remove the `Tok` enum. The `Tok` enum represents the tokens emitted by the lexer but it contains owned data which makes it expensive to clone. The new `TokenKind` enum just represents the type of token which is very cheap. This brings up a question as to how will the parser get the owned value which was stored on `Tok`. This will be solved by introducing a new `TokenValue` enum which only contains a subset of token kinds which has the owned value. This is stored on the lexer and is requested by the parser when it wants to process the data. For example: https://github.com/astral-sh/ruff/blob/8196720f809380d8f1fc7651679ff3fc2cb58cd7/crates/ruff_python_parser/src/parser/expression.rs#L1260-L1262 [^1]: Trivia tokens are `NonLogicalNewline` and `Comment` ### Remove `SoftKeywordTransformer` * https://github.com/astral-sh/ruff/pull/11441 * https://github.com/astral-sh/ruff/pull/11459 * https://github.com/astral-sh/ruff/pull/11442 * https://github.com/astral-sh/ruff/pull/11443 * https://github.com/astral-sh/ruff/pull/11474 For context, https://github.com/RustPython/RustPython/pull/4519/files#diff-5de40045e78e794aa5ab0b8aacf531aa477daf826d31ca129467703855408220 added support for soft keywords in the parser which uses infinite lookahead to classify a soft keyword as a keyword or an identifier. This is a brilliant idea as it basically wraps the existing Lexer and works on top of it which means that the logic for lexing and re-lexing a soft keyword remains separate. The change here is to remove `SoftKeywordTransformer` and let the parser determine this based on context, lookahead and speculative parsing. * **Context:** The transformer needs to know the position of the lexer between it being at a statement position or a simple statement position. This is because a `match` token starts a compound statement while a `type` token starts a simple statement. **The parser already knows this.** * **Lookahead:** Now that the parser knows the context it can perform lookahead of up to two tokens to classify the soft keyword. The logic for this is mentioned in the PR implementing it for `type` and `match soft keyword. * **Speculative parsing:** This is where the checkpoint - rewind infrastructure helps. For `match` soft keyword, there are certain cases for which we can't classify based on lookahead. The idea here is to create a checkpoint and keep parsing. Based on whether the parsing was successful and what tokens are ahead we can classify the remaining cases. Refer to #11443 for more details. If the soft keyword is being parsed in an identifier context, it'll be converted to an identifier and the emitted token will be updated as well. Refer https://github.com/astral-sh/ruff/blob/8196720f809380d8f1fc7651679ff3fc2cb58cd7/crates/ruff_python_parser/src/parser/expression.rs#L487-L491. The `case` soft keyword doesn't require any special handling because it'll be a keyword only in the context of a match statement. ### Update the parser API * https://github.com/astral-sh/ruff/pull/11494 * https://github.com/astral-sh/ruff/pull/11505 Now that the lexer is in sync with the parser, and the parser helps to determine whether a soft keyword is a keyword or an identifier, the lexer cannot be used on its own. The reason being that it's not sensitive to the context (which is correct). This means that the parser API needs to be updated to not allow any access to the lexer. Previously, there were multiple ways to parse the source code: 1. Passing the source code itself 2. Or, passing the tokens Now that the lexer and parser are working together, the API corresponding to (2) cannot exists. The final API is mentioned in this PR description: https://github.com/astral-sh/ruff/pull/11494. ### Refactor the downstream tools (linter and formatter) * https://github.com/astral-sh/ruff/pull/11511 * https://github.com/astral-sh/ruff/pull/11515 * https://github.com/astral-sh/ruff/pull/11529 * https://github.com/astral-sh/ruff/pull/11562 * https://github.com/astral-sh/ruff/pull/11592 And, the final set of changes involves updating all references of the lexer and `Tok` enum. This was done in two-parts: 1. Update all the references in a way that doesn't require any changes from this PR i.e., it can be done independently * https://github.com/astral-sh/ruff/pull/11402 * https://github.com/astral-sh/ruff/pull/11406 * https://github.com/astral-sh/ruff/pull/11418 * https://github.com/astral-sh/ruff/pull/11419 * https://github.com/astral-sh/ruff/pull/11420 * https://github.com/astral-sh/ruff/pull/11424 2. Update all the remaining references to use the changes made in this PR For (2), there were various strategies used: 1. Introduce a new `Tokens` struct which wraps the token vector and add methods to query a certain subset of tokens. These includes: 1. `up_to_first_unknown` which replaces the `tokenize` function 2. `in_range` and `after` which replaces the `lex_starts_at` function where the former returns the tokens within the given range while the latter returns all the tokens after the given offset 2. Introduce a new `TokenFlags` which is a set of flags to query certain information from a token. Currently, this information is only limited to any string type token but can be expanded to include other information in the future as needed. https://github.com/astral-sh/ruff/pull/11578 3. Move the `CommentRanges` to the parsed output because this information is common to both the linter and the formatter. This removes the need for `tokens_and_ranges` function. ## Test Plan - [x] Update and verify the test snapshots - [x] Make sure the entire test suite is passing - [x] Make sure there are no changes in the ecosystem checks - [x] Run the fuzzer on the parser - [x] Run this change on dozens of open-source projects ### Running this change on dozens of open-source projects Refer to the PR description to get the list of open source projects used for testing. Now, the following tests were done between `main` and this branch: 1. Compare the output of `--select=E999` (syntax errors) 2. Compare the output of default rule selection 3. Compare the output of `--select=ALL` **Conclusion: all output were same** ## What's next? The next step is to introduce re-lexing logic and update the parser to feed the recovery information to the lexer so that it can emit the correct token. This moves us one step closer to having error resilience in the parser and provides Ruff the possibility to lint even if the source code contains syntax errors. --- Cargo.lock | 7 +- crates/red_knot/src/parse.rs | 22 +- crates/ruff_benchmark/Cargo.toml | 1 - crates/ruff_benchmark/benches/formatter.rs | 27 +- crates/ruff_benchmark/benches/lexer.rs | 14 +- crates/ruff_benchmark/benches/linter.rs | 14 +- crates/ruff_benchmark/benches/parser.rs | 6 +- crates/ruff_dev/Cargo.toml | 1 + crates/ruff_dev/src/print_ast.rs | 2 +- crates/ruff_dev/src/print_tokens.rs | 13 +- .../src/checkers/ast/analyze/expression.rs | 2 +- .../src/checkers/ast/analyze/statement.rs | 2 +- crates/ruff_linter/src/checkers/ast/mod.rs | 26 +- crates/ruff_linter/src/checkers/filesystem.rs | 6 +- crates/ruff_linter/src/checkers/imports.rs | 10 +- .../ruff_linter/src/checkers/logical_lines.rs | 5 +- .../src/checkers/physical_lines.rs | 16 +- crates/ruff_linter/src/checkers/tokens.rs | 55 +- crates/ruff_linter/src/directives.rs | 112 +- crates/ruff_linter/src/doc_lines.rs | 19 +- crates/ruff_linter/src/fix/edits.rs | 43 +- crates/ruff_linter/src/importer/insertion.rs | 56 +- crates/ruff_linter/src/importer/mod.rs | 22 +- crates/ruff_linter/src/linter.rs | 196 +-- .../src/rules/eradicate/detection.rs | 4 +- .../eradicate/rules/commented_out_code.rs | 6 +- .../rules/zip_without_explicit_strict.rs | 2 +- .../flake8_commas/rules/trailing_commas.rs | 44 +- .../rules/unnecessary_generator_list.rs | 2 +- .../src/rules/flake8_executable/rules/mod.rs | 6 +- .../rules/implicit.rs | 32 +- .../rules/implicit_namespace_package.rs | 7 +- .../rules/unnecessary_dict_kwargs.rs | 2 +- .../rules/generic_not_last_base_class.rs | 2 +- .../flake8_pyi/rules/type_comment_in_stub.rs | 6 +- .../flake8_pytest_style/rules/assertion.rs | 8 +- .../flake8_pytest_style/rules/parametrize.rs | 10 +- .../flake8_simplify/rules/ast_bool_op.rs | 6 +- .../rules/flake8_simplify/rules/ast_ifexp.rs | 2 +- .../rules/flake8_simplify/rules/ast_with.rs | 2 +- .../flake8_simplify/rules/collapsible_if.rs | 12 +- .../if_else_block_instead_of_dict_get.rs | 4 +- .../rules/if_else_block_instead_of_if_exp.rs | 2 +- .../rules/if_with_same_arms.rs | 13 +- .../flake8_simplify/rules/key_in_dict.rs | 4 +- .../flake8_simplify/rules/needless_bool.rs | 2 +- .../rules/suppressible_exception.rs | 2 +- .../src/rules/flake8_todos/rules/todos.rs | 11 +- .../rules/typing_only_runtime_import.rs | 1 - .../ruff_linter/src/rules/isort/annotate.rs | 7 +- .../ruff_linter/src/rules/isort/comments.rs | 7 +- crates/ruff_linter/src/rules/isort/helpers.rs | 38 +- crates/ruff_linter/src/rules/isort/mod.rs | 4 +- .../rules/isort/rules/add_required_imports.rs | 22 +- .../src/rules/isort/rules/organize_imports.rs | 9 +- .../mccabe/rules/function_is_too_complex.rs | 7 +- .../pandas_vet/rules/inplace_argument.rs | 2 +- .../src/rules/pycodestyle/overlong.rs | 11 +- .../rules/pycodestyle/rules/blank_lines.rs | 117 +- .../pycodestyle/rules/compound_statements.rs | 116 +- .../pycodestyle/rules/doc_line_too_long.rs | 6 +- .../rules/pycodestyle/rules/line_too_long.rs | 6 +- .../pycodestyle/rules/literal_comparisons.rs | 2 +- .../pycodestyle/rules/logical_lines/mod.rs | 20 +- .../src/rules/pycodestyle/rules/not_tests.rs | 4 +- .../rules/too_many_newlines_at_end_of_file.rs | 17 +- crates/ruff_linter/src/rules/pyflakes/mod.rs | 15 +- .../rules/invalid_literal_comparisons.rs | 137 +- .../src/rules/pyflakes/rules/repeated_keys.rs | 8 +- .../rules/pyflakes/rules/unused_variable.rs | 169 +- .../pygrep_hooks/rules/blanket_type_ignore.rs | 6 +- .../src/rules/pylint/rules/empty_comment.rs | 9 +- .../src/rules/pylint/rules/if_stmt_min_max.rs | 2 +- .../src/rules/pylint/rules/nested_min_max.rs | 2 +- .../rules/subprocess_run_without_check.rs | 2 +- .../rules/pylint/rules/too_many_branches.rs | 6 +- .../rules/too_many_return_statements.rs | 6 +- .../rules/pylint/rules/too_many_statements.rs | 8 +- .../pylint/rules/unspecified_encoding.rs | 4 +- .../ruff_linter/src/rules/pyupgrade/fixes.rs | 114 +- .../pyupgrade/rules/deprecated_import.rs | 54 +- .../pyupgrade/rules/extraneous_parentheses.rs | 57 +- .../src/rules/pyupgrade/rules/f_strings.rs | 30 +- .../rules/printf_string_formatting.rs | 93 +- .../pyupgrade/rules/redundant_open_modes.rs | 73 +- .../rules/unnecessary_coding_comment.rs | 4 +- .../rules/unnecessary_encode_utf8.rs | 33 +- .../pyupgrade/rules/yield_in_for_loop.rs | 2 +- .../rules/if_exp_instead_of_or_operator.rs | 20 +- .../src/rules/refurb/rules/repeated_append.rs | 2 +- .../rules/single_item_membership_test.rs | 2 +- .../rules/collection_literal_concatenation.rs | 2 +- .../invalid_formatter_suppression_comment.rs | 4 +- .../ruff/rules/missing_fstring_syntax.rs | 6 +- .../rules/parenthesize_logical_operators.rs | 2 +- .../ruff/rules/quadratic_list_summation.rs | 2 +- .../src/rules/ruff/rules/sequence_sorting.rs | 38 +- .../src/rules/ruff/rules/sort_dunder_all.rs | 1 + .../src/rules/ruff/rules/sort_dunder_slots.rs | 1 + .../src/rules/ruff/rules/test_rules.rs | 40 +- .../rules/ruff/rules/unnecessary_key_check.rs | 4 +- crates/ruff_linter/src/test.rs | 25 +- crates/ruff_python_ast/src/str_prefix.rs | 39 - .../tests/identifier.rs | 4 +- .../tests/parenthesize.rs | 32 +- .../tests/preorder.rs | 8 +- .../tests/stmt_if.rs | 30 +- .../tests/visitor.rs | 8 +- crates/ruff_python_codegen/Cargo.toml | 1 + crates/ruff_python_codegen/src/generator.rs | 14 +- crates/ruff_python_codegen/src/lib.rs | 9 +- crates/ruff_python_codegen/src/stylist.rs | 189 +-- crates/ruff_python_formatter/Cargo.toml | 1 - crates/ruff_python_formatter/src/cli.rs | 16 +- .../ruff_python_formatter/src/comments/mod.rs | 22 +- crates/ruff_python_formatter/src/context.rs | 14 +- .../src/expression/expr_name.rs | 8 +- .../src/expression/parentheses.rs | 7 +- crates/ruff_python_formatter/src/lib.rs | 29 +- crates/ruff_python_formatter/src/range.rs | 32 +- .../src/statement/suite.rs | 11 +- .../src/string/docstring.rs | 11 +- crates/ruff_python_formatter/src/verbatim.rs | 54 +- .../ruff_python_formatter/tests/fixtures.rs | 8 +- .../ruff_python_index/src/comment_ranges.rs | 44 - .../ruff_python_index/src/fstring_ranges.rs | 16 +- crates/ruff_python_index/src/indexer.rs | 84 +- crates/ruff_python_index/src/lib.rs | 2 - .../ruff_python_index/src/multiline_ranges.rs | 13 +- crates/ruff_python_parser/Cargo.toml | 1 + .../inline/err/async_unexpected_token.py | 2 - .../inline/err/match_classify_as_keyword.py | 2 + ...match_classify_as_keyword_or_identifier.py | 2 + .../inline/err/match_expected_colon.py | 2 + .../inline/err/match_stmt_missing_pattern.py | 1 - .../ok/except_stmt_as_name_soft_keyword.py | 4 + .../from_import_soft_keyword_module_name.py | 4 + .../inline/ok/import_as_name_soft_keyword.py | 3 + .../ok/match_as_pattern_soft_keyword.py | 4 + .../ok/match_attr_pattern_soft_keyword.py | 5 + .../ok/match_classify_as_identifier_1.py | 1 + .../ok/match_classify_as_identifier_2.py | 13 + .../inline/ok/match_classify_as_keyword_1.py | 24 + .../inline/ok/match_classify_as_keyword_2.py | 12 + ...match_classify_as_keyword_or_identifier.py | 10 + crates/ruff_python_parser/src/error.rs | 4 +- crates/ruff_python_parser/src/lexer.rs | 1414 ++++++++++------- crates/ruff_python_parser/src/lexer/cursor.rs | 41 +- .../ruff_python_parser/src/lexer/fstring.rs | 27 +- .../src/lexer/indentation.rs | 15 +- crates/ruff_python_parser/src/lib.rs | 735 ++++++--- .../src/parser/expression.rs | 122 +- crates/ruff_python_parser/src/parser/mod.rs | 350 ++-- .../ruff_python_parser/src/parser/pattern.rs | 106 +- .../src/parser/statement.rs | 353 +++- crates/ruff_python_parser/src/parser/tests.rs | 30 +- ...thon_parser__lexer__tests__assignment.snap | 27 +- ...__lexer__tests__comment_until_mac_eol.snap | 19 +- ..._lexer__tests__comment_until_unix_eol.snap | 19 +- ...xer__tests__comment_until_windows_eol.snap | 19 +- ...lexer__tests__dedent_after_whitespace.snap | 79 + ...er__tests__double_dedent_with_mac_eol.snap | 21 +- ...ests__double_dedent_with_tabs_mac_eol.snap | 21 +- ...sts__double_dedent_with_tabs_unix_eol.snap | 21 +- ...__double_dedent_with_tabs_windows_eol.snap | 21 +- ...r__tests__double_dedent_with_unix_eol.snap | 21 +- ...tests__double_dedent_with_windows_eol.snap | 21 +- ...arser__lexer__tests__emoji_identifier.snap | 24 + ..._parser__lexer__tests__empty_fstrings.snap | 112 +- ...__tests__empty_ipython_escape_command.snap | 3 + ...er__lexer__tests__escape_unicode_name.snap | 19 +- ..._python_parser__lexer__tests__fstring.snap | 101 +- ...arser__lexer__tests__fstring_comments.snap | 61 +- ...ser__lexer__tests__fstring_conversion.snap | 113 +- ..._parser__lexer__tests__fstring_escape.snap | 79 +- ...__lexer__tests__fstring_escape_braces.snap | 143 +- ...ser__lexer__tests__fstring_escape_raw.snap | 87 +- ...__tests__fstring_expression_multiline.snap | 63 +- ...rser__lexer__tests__fstring_multiline.snap | 153 +- ...__lexer__tests__fstring_named_unicode.snap | 35 +- ...xer__tests__fstring_named_unicode_raw.snap | 63 +- ..._parser__lexer__tests__fstring_nested.snap | 223 ++- ...er__lexer__tests__fstring_parentheses.snap | 227 ++- ..._parser__lexer__tests__fstring_prefix.snap | 179 +-- ...__fstring_single_quote_escape_mac_eol.snap | 35 +- ..._fstring_single_quote_escape_unix_eol.snap | 35 +- ...tring_single_quote_escape_windows_eol.snap | 35 +- ...exer__tests__fstring_with_format_spec.snap | 220 ++- ...ests__fstring_with_ipy_escape_command.snap | 57 +- ...tests__fstring_with_lambda_expression.snap | 59 +- ...s__fstring_with_multiline_format_spec.snap | 289 ++-- ..._tests__fstring_with_named_expression.snap | 131 +- ...__lexer__tests__fstring_with_nul_char.snap | 35 +- ...exer__tests__indentation_with_mac_eol.snap | 15 +- ...xer__tests__indentation_with_unix_eol.snap | 15 +- ...__tests__indentation_with_windows_eol.snap | 15 +- ...exer__tests__invalid_leading_zero_big.snap | 22 +- ...er__tests__invalid_leading_zero_small.snap | 22 +- ..._lexer__tests__ipython_escape_command.snap | 3 + ...ts__ipython_escape_command_assignment.snap | 27 +- ...s__ipython_escape_command_indentation.snap | 3 + ...ape_command_line_continuation_mac_eol.snap | 3 + ...pe_command_line_continuation_unix_eol.snap | 3 + ...command_line_continuation_windows_eol.snap | 3 + ...ine_continuation_with_mac_eol_and_eof.snap | 3 + ...ne_continuation_with_unix_eol_and_eof.snap | 3 + ...continuation_with_windows_eol_and_eof.snap | 3 + ...ests__ipython_help_end_escape_command.snap | 3 + ...ser__lexer__tests__line_comment_empty.snap | 13 +- ...rser__lexer__tests__line_comment_long.snap | 13 +- ...tests__line_comment_single_whitespace.snap | 13 +- ...lexer__tests__line_comment_whitespace.snap | 13 +- ...__tests__logical_newline_line_comment.snap | 11 +- ..._tests__match_softkeyword_in_notebook.snap | 15 +- ...r__tests__newline_in_brackets_mac_eol.snap | 51 +- ...__tests__newline_in_brackets_unix_eol.snap | 51 +- ...ests__newline_in_brackets_windows_eol.snap | 51 +- ...ogical_newline_in_string_continuation.snap | 55 +- ..._python_parser__lexer__tests__numbers.snap | 75 +- ...ython_parser__lexer__tests__operators.snap | 3 + ...f_python_parser__lexer__tests__string.snap | 131 +- ...sts__string_continuation_with_mac_eol.snap | 19 +- ...ts__string_continuation_with_unix_eol.snap | 19 +- ..._string_continuation_with_windows_eol.snap | 19 +- ...ser__lexer__tests__tet_too_low_dedent.snap | 94 +- ...__lexer__tests__triple_quoted_mac_eol.snap | 19 +- ..._lexer__tests__triple_quoted_unix_eol.snap | 19 +- ...xer__tests__triple_quoted_windows_eol.snap | 19 +- .../ruff_python_parser/src/soft_keywords.rs | 224 --- crates/ruff_python_parser/src/string.rs | 192 ++- crates/ruff_python_parser/src/token.rs | 634 ++------ crates/ruff_python_parser/src/token_source.rs | 232 ++- crates/ruff_python_parser/src/typing.rs | 19 +- crates/ruff_python_parser/tests/fixtures.rs | 34 +- ..._assign_stmt_type_alias_annotation.py.snap | 14 - ...alid_syntax@async_unexpected_token.py.snap | 133 +- ...d_syntax@match_classify_as_keyword.py.snap | 66 + ..._classify_as_keyword_or_identifier.py.snap | 65 + ...nvalid_syntax@match_expected_colon.py.snap | 76 + ..._syntax@match_stmt_missing_pattern.py.snap | 67 +- ...@match_stmt_no_newline_before_case.py.snap | 60 +- ...ax@statements__match__as_pattern_4.py.snap | 12 +- ...x@except_stmt_as_name_soft_keyword.py.snap | 133 ++ ...om_import_soft_keyword_module_name.py.snap | 103 ++ ...syntax@import_as_name_soft_keyword.py.snap | 75 + ...ntax@match_as_pattern_soft_keyword.py.snap | 113 ++ ...ax@match_attr_pattern_soft_keyword.py.snap | 231 +++ ...tax@match_classify_as_identifier_1.py.snap | 44 + ...tax@match_classify_as_identifier_2.py.snap | 319 ++++ ...syntax@match_classify_as_keyword_1.py.snap | 578 +++++++ ...syntax@match_classify_as_keyword_2.py.snap | 233 +++ ..._classify_as_keyword_or_identifier.py.snap | 291 ++++ .../src/analyze/type_inference.rs | 48 +- .../Cargo.toml | 1 - .../tests/block_comments.rs | 38 +- .../tests/simple_tokenizer.rs | 16 +- .../tests/whitespace.rs | 22 +- crates/ruff_server/src/lint.rs | 17 +- crates/ruff_wasm/Cargo.toml | 1 - crates/ruff_wasm/src/lib.rs | 55 +- fuzz/Cargo.toml | 1 + fuzz/fuzz_targets/ruff_parse_simple.rs | 51 +- 262 files changed, 8098 insertions(+), 6056 deletions(-) delete mode 100644 crates/ruff_python_index/src/comment_ranges.rs create mode 100644 crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py create mode 100644 crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py create mode 100644 crates/ruff_python_parser/resources/inline/err/match_expected_colon.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap delete mode 100644 crates/ruff_python_parser/src/soft_keywords.rs create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap diff --git a/Cargo.lock b/Cargo.lock index 0a8a6690d2e42..f65270364507b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1960,7 +1960,6 @@ dependencies = [ "ruff_linter", "ruff_python_ast", "ruff_python_formatter", - "ruff_python_index", "ruff_python_parser", "serde", "serde_json", @@ -2008,6 +2007,7 @@ dependencies = [ "ruff_python_parser", "ruff_python_stdlib", "ruff_python_trivia", + "ruff_text_size", "ruff_workspace", "schemars", "serde", @@ -2184,6 +2184,7 @@ dependencies = [ "ruff_python_literal", "ruff_python_parser", "ruff_source_file", + "ruff_text_size", ] [[package]] @@ -2202,7 +2203,6 @@ dependencies = [ "ruff_formatter", "ruff_macros", "ruff_python_ast", - "ruff_python_index", "ruff_python_parser", "ruff_python_trivia", "ruff_source_file", @@ -2253,6 +2253,7 @@ dependencies = [ "itertools 0.13.0", "memchr", "ruff_python_ast", + "ruff_python_trivia", "ruff_source_file", "ruff_text_size", "rustc-hash", @@ -2310,7 +2311,6 @@ name = "ruff_python_trivia_integration_tests" version = "0.0.0" dependencies = [ "insta", - "ruff_python_index", "ruff_python_parser", "ruff_python_trivia", "ruff_source_file", @@ -2385,7 +2385,6 @@ dependencies = [ "ruff_python_formatter", "ruff_python_index", "ruff_python_parser", - "ruff_python_trivia", "ruff_source_file", "ruff_text_size", "ruff_workspace", diff --git a/crates/red_knot/src/parse.rs b/crates/red_knot/src/parse.rs index 6856315dcb494..4e3cd4d422119 100644 --- a/crates/red_knot/src/parse.rs +++ b/crates/red_knot/src/parse.rs @@ -32,17 +32,19 @@ impl Parsed { let result = ruff_python_parser::parse(text, Mode::Module); let (module, errors) = match result { - Ok(ast::Mod::Module(module)) => (module, vec![]), - Ok(ast::Mod::Expression(expression)) => ( - ast::ModModule { - range: expression.range(), - body: vec![ast::Stmt::Expr(ast::StmtExpr { + Ok(parsed) => match parsed.into_syntax() { + ast::Mod::Module(module) => (module, vec![]), + ast::Mod::Expression(expression) => ( + ast::ModModule { range: expression.range(), - value: expression.body, - })], - }, - vec![], - ), + body: vec![ast::Stmt::Expr(ast::StmtExpr { + range: expression.range(), + value: expression.body, + })], + }, + vec![], + ), + }, Err(errors) => ( ast::ModModule { range: TextRange::default(), diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index d631472f23e4f..e8ff3a09b811c 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -44,7 +44,6 @@ codspeed-criterion-compat = { workspace = true, default-features = false, option ruff_linter = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_formatter = { workspace = true } -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } [lints] diff --git a/crates/ruff_benchmark/benches/formatter.rs b/crates/ruff_benchmark/benches/formatter.rs index 98c3a97f2c956..cb6db8608fc4d 100644 --- a/crates/ruff_benchmark/benches/formatter.rs +++ b/crates/ruff_benchmark/benches/formatter.rs @@ -5,9 +5,7 @@ use ruff_benchmark::criterion::{ }; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions}; -use ruff_python_index::CommentRangesBuilder; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{allocate_tokens_vec, parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[cfg(target_os = "windows")] #[global_allocator] @@ -52,28 +50,15 @@ fn benchmark_formatter(criterion: &mut Criterion) { BenchmarkId::from_parameter(case.name()), &case, |b, case| { - let mut tokens = allocate_tokens_vec(case.code()); - let mut comment_ranges = CommentRangesBuilder::default(); - - for result in lex(case.code(), Mode::Module) { - let (token, range) = result.expect("Input to be a valid python program."); - - comment_ranges.visit_token(&token, range); - tokens.push(Ok((token, range))); - } - - let comment_ranges = comment_ranges.finish(); - - // Parse the AST. - let module = parse_tokens(tokens, case.code(), Mode::Module) - .expect("Input to be a valid python program"); + // Parse the source. + let parsed = + parse(case.code(), Mode::Module).expect("Input should be a valid Python code"); b.iter(|| { let options = PyFormatOptions::from_extension(Path::new(case.name())) .with_preview(PreviewMode::Enabled); - let formatted = - format_module_ast(&module, &comment_ranges, case.code(), options) - .expect("Formatting to succeed"); + let formatted = format_module_ast(&parsed, case.code(), options) + .expect("Formatting to succeed"); formatted.print().expect("Printing to succeed") }); diff --git a/crates/ruff_benchmark/benches/lexer.rs b/crates/ruff_benchmark/benches/lexer.rs index c31cb84b5e4f2..64b68a7a3539a 100644 --- a/crates/ruff_benchmark/benches/lexer.rs +++ b/crates/ruff_benchmark/benches/lexer.rs @@ -2,7 +2,7 @@ use ruff_benchmark::criterion::{ criterion_group, criterion_main, measurement::WallTime, BenchmarkId, Criterion, Throughput, }; use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; -use ruff_python_parser::{lexer, Mode}; +use ruff_python_parser::{lexer, Mode, TokenKind}; #[cfg(target_os = "windows")] #[global_allocator] @@ -47,9 +47,15 @@ fn benchmark_lexer(criterion: &mut Criterion) { &case, |b, case| { b.iter(|| { - let result = - lexer::lex(case.code(), Mode::Module).find(std::result::Result::is_err); - assert_eq!(result, None, "Input to be a valid Python program"); + let mut lexer = lexer::lex(case.code(), Mode::Module); + loop { + let token = lexer.next_token(); + match token { + TokenKind::EndOfFile => break, + TokenKind::Unknown => panic!("Input to be a valid Python source code"), + _ => {} + } + } }); }, ); diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index fcc1d7da42cfb..1301d9e7cc179 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -10,7 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::SourceKind; use ruff_linter::{registry::Rule, RuleSelector}; use ruff_python_ast::PySourceType; -use ruff_python_parser::{parse_program_tokens, tokenize, Mode}; +use ruff_python_parser::parse_module; #[cfg(target_os = "windows")] #[global_allocator] @@ -54,15 +54,13 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { BenchmarkId::from_parameter(case.name()), &case, |b, case| { - // Tokenize the source. - let tokens = tokenize(case.code(), Mode::Module); - // Parse the source. - let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap(); + let parsed = + parse_module(case.code()).expect("Input should be a valid Python code"); b.iter_batched( - || (ast.clone(), tokens.clone()), - |(ast, tokens)| { + || parsed.clone(), + |parsed| { let path = case.path(); let result = lint_only( &path, @@ -71,7 +69,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { flags::Noqa::Enabled, &SourceKind::Python(case.code().to_string()), PySourceType::from(path.as_path()), - ParseSource::Precomputed { tokens, ast }, + ParseSource::Precomputed(parsed), ); // Assert that file contains no parse errors diff --git a/crates/ruff_benchmark/benches/parser.rs b/crates/ruff_benchmark/benches/parser.rs index 0aca2772f937a..ec2fa671c1df0 100644 --- a/crates/ruff_benchmark/benches/parser.rs +++ b/crates/ruff_benchmark/benches/parser.rs @@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{ use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError}; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; use ruff_python_ast::Stmt; -use ruff_python_parser::parse_suite; +use ruff_python_parser::parse_module; #[cfg(target_os = "windows")] #[global_allocator] @@ -60,7 +60,9 @@ fn benchmark_parser(criterion: &mut Criterion) { &case, |b, case| { b.iter(|| { - let parsed = parse_suite(case.code()).unwrap(); + let parsed = parse_module(case.code()) + .expect("Input should be a valid Python code") + .into_suite(); let mut visitor = CountVisitor { count: 0 }; visitor.visit_body(&parsed); diff --git a/crates/ruff_dev/Cargo.toml b/crates/ruff_dev/Cargo.toml index 632c12f473786..d5ccc937fd9c5 100644 --- a/crates/ruff_dev/Cargo.toml +++ b/crates/ruff_dev/Cargo.toml @@ -22,6 +22,7 @@ ruff_python_formatter = { workspace = true } ruff_python_parser = { workspace = true } ruff_python_stdlib = { workspace = true } ruff_python_trivia = { workspace = true } +ruff_text_size = { workspace = true } ruff_workspace = { workspace = true, features = ["schemars"] } anyhow = { workspace = true } diff --git a/crates/ruff_dev/src/print_ast.rs b/crates/ruff_dev/src/print_ast.rs index cb72d0403abaa..35206ca45ec13 100644 --- a/crates/ruff_dev/src/print_ast.rs +++ b/crates/ruff_dev/src/print_ast.rs @@ -24,7 +24,7 @@ pub(crate) fn main(args: &Args) -> Result<()> { args.file.display() ) })?; - let python_ast = parse(source_kind.source_code(), source_type.as_mode())?; + let python_ast = parse(source_kind.source_code(), source_type.as_mode())?.into_syntax(); println!("{python_ast:#?}"); Ok(()) } diff --git a/crates/ruff_dev/src/print_tokens.rs b/crates/ruff_dev/src/print_tokens.rs index a36f9a2c60f49..c767727fdd2b1 100644 --- a/crates/ruff_dev/src/print_tokens.rs +++ b/crates/ruff_dev/src/print_tokens.rs @@ -7,7 +7,8 @@ use anyhow::Result; use ruff_linter::source_kind::SourceKind; use ruff_python_ast::PySourceType; -use ruff_python_parser::{lexer, AsMode}; +use ruff_python_parser::parse_unchecked_source; +use ruff_text_size::Ranged; #[derive(clap::Args)] pub(crate) struct Args { @@ -24,11 +25,13 @@ pub(crate) fn main(args: &Args) -> Result<()> { args.file.display() ) })?; - for (tok, range) in lexer::lex(source_kind.source_code(), source_type.as_mode()).flatten() { + let parsed = parse_unchecked_source(source_kind.source_code(), source_type); + for token in parsed.tokens() { println!( - "{start:#?} {tok:#?} {end:#?}", - start = range.start(), - end = range.end() + "{start:#?} {kind:#?} {end:#?}", + start = token.start(), + end = token.end(), + kind = token.kind(), ); } Ok(()) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 4407dadc219ea..9c12ac03339c9 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -1160,7 +1160,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { } } if checker.enabled(Rule::PrintfStringFormatting) { - pyupgrade::rules::printf_string_formatting(checker, expr, right); + pyupgrade::rules::printf_string_formatting(checker, bin_op, format_string); } if checker.enabled(Rule::BadStringFormatCharacter) { pylint::rules::bad_string_format_character::percent( diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index 94419de40fbb0..70561392e7f71 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -765,7 +765,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { pyupgrade::rules::deprecated_c_element_tree(checker, stmt); } if checker.enabled(Rule::DeprecatedImport) { - pyupgrade::rules::deprecated_import(checker, stmt, names, module, level); + pyupgrade::rules::deprecated_import(checker, import_from); } if checker.enabled(Rule::UnnecessaryBuiltinImport) { if let Some(module) = module { diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index faaf41595aa3c..5f26244df7fff 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -32,8 +32,10 @@ use itertools::Itertools; use log::debug; use ruff_python_ast::{ self as ast, AnyParameterRef, Comprehension, ElifElseClause, ExceptHandler, Expr, ExprContext, - FStringElement, Keyword, MatchCase, Parameter, Parameters, Pattern, Stmt, Suite, UnaryOp, + FStringElement, Keyword, MatchCase, ModModule, Parameter, Parameters, Pattern, Stmt, Suite, + UnaryOp, }; +use ruff_python_parser::Parsed; use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_diagnostics::{Diagnostic, IsolationLevel}; @@ -174,6 +176,8 @@ impl ExpectedDocstringKind { } pub(crate) struct Checker<'a> { + /// The parsed [`Parsed`]. + parsed: &'a Parsed, /// The [`Path`] to the file under analysis. path: &'a Path, /// The [`Path`] to the package containing the current file. @@ -223,6 +227,7 @@ pub(crate) struct Checker<'a> { impl<'a> Checker<'a> { #[allow(clippy::too_many_arguments)] pub(crate) fn new( + parsed: &'a Parsed, settings: &'a LinterSettings, noqa_line_for: &'a NoqaMapping, noqa: flags::Noqa, @@ -232,12 +237,12 @@ impl<'a> Checker<'a> { locator: &'a Locator, stylist: &'a Stylist, indexer: &'a Indexer, - importer: Importer<'a>, source_type: PySourceType, cell_offsets: Option<&'a CellOffsets>, notebook_index: Option<&'a NotebookIndex>, ) -> Checker<'a> { Checker { + parsed, settings, noqa_line_for, noqa, @@ -248,7 +253,7 @@ impl<'a> Checker<'a> { locator, stylist, indexer, - importer, + importer: Importer::new(parsed, locator, stylist), semantic: SemanticModel::new(&settings.typing_modules, path, module), visit: deferred::Visit::default(), analyze: deferred::Analyze::default(), @@ -318,6 +323,11 @@ impl<'a> Checker<'a> { } } + /// The [`Parsed`] output for the current file, which contains the tokens, AST, and more. + pub(crate) const fn parsed(&self) -> &'a Parsed { + self.parsed + } + /// The [`Locator`] for the current file, which enables extraction of source code from byte /// offsets. pub(crate) const fn locator(&self) -> &'a Locator<'a> { @@ -2326,7 +2336,7 @@ impl<'a> Checker<'a> { #[allow(clippy::too_many_arguments)] pub(crate) fn check_ast( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, indexer: &Indexer, @@ -2356,10 +2366,11 @@ pub(crate) fn check_ast( } else { ModuleSource::File(path) }, - python_ast, + python_ast: parsed.suite(), }; let mut checker = Checker::new( + parsed, settings, noqa_line_for, noqa, @@ -2369,7 +2380,6 @@ pub(crate) fn check_ast( locator, stylist, indexer, - Importer::new(python_ast, locator, stylist), source_type, cell_offsets, notebook_index, @@ -2377,8 +2387,8 @@ pub(crate) fn check_ast( checker.bind_builtins(); // Iterate over the AST. - checker.visit_module(python_ast); - checker.visit_body(python_ast); + checker.visit_module(parsed.suite()); + checker.visit_body(parsed.suite()); // Visit any deferred syntax nodes. Take care to visit in order, such that we avoid adding // new deferred nodes after visiting nodes of that kind. For example, visiting a deferred diff --git a/crates/ruff_linter/src/checkers/filesystem.rs b/crates/ruff_linter/src/checkers/filesystem.rs index 2d9a3431e6fb5..c71db50cb3563 100644 --- a/crates/ruff_linter/src/checkers/filesystem.rs +++ b/crates/ruff_linter/src/checkers/filesystem.rs @@ -1,7 +1,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use crate::registry::Rule; @@ -13,7 +13,7 @@ pub(crate) fn check_file_path( path: &Path, package: Option<&Path>, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Vec { let mut diagnostics: Vec = vec![]; @@ -24,7 +24,7 @@ pub(crate) fn check_file_path( path, package, locator, - indexer, + comment_ranges, &settings.project_root, &settings.src, ) { diff --git a/crates/ruff_linter/src/checkers/imports.rs b/crates/ruff_linter/src/checkers/imports.rs index 2bc19b74125b3..c2cc0fccb4170 100644 --- a/crates/ruff_linter/src/checkers/imports.rs +++ b/crates/ruff_linter/src/checkers/imports.rs @@ -4,9 +4,10 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; use ruff_notebook::CellOffsets; use ruff_python_ast::statement_visitor::StatementVisitor; -use ruff_python_ast::{PySourceType, Suite}; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_source_file::Locator; use crate::directives::IsortDirectives; @@ -17,7 +18,7 @@ use crate::settings::LinterSettings; #[allow(clippy::too_many_arguments)] pub(crate) fn check_imports( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, indexer: &Indexer, directives: &IsortDirectives, @@ -31,7 +32,7 @@ pub(crate) fn check_imports( let tracker = { let mut tracker = BlockBuilder::new(locator, directives, source_type.is_stub(), cell_offsets); - tracker.visit_body(python_ast); + tracker.visit_body(parsed.suite()); tracker }; @@ -50,6 +51,7 @@ pub(crate) fn check_imports( settings, package, source_type, + parsed, ) { diagnostics.push(diagnostic); } @@ -58,7 +60,7 @@ pub(crate) fn check_imports( } if settings.rules.enabled(Rule::MissingRequiredImport) { diagnostics.extend(isort::rules::add_required_imports( - python_ast, + parsed, locator, stylist, settings, diff --git a/crates/ruff_linter/src/checkers/logical_lines.rs b/crates/ruff_linter/src/checkers/logical_lines.rs index 4044e6c18a67b..ef9a7a8dae8e5 100644 --- a/crates/ruff_linter/src/checkers/logical_lines.rs +++ b/crates/ruff_linter/src/checkers/logical_lines.rs @@ -2,8 +2,7 @@ use crate::line_width::IndentWidth; use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::TokenKind; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -34,7 +33,7 @@ pub(crate) fn expand_indent(line: &str, indent_width: IndentWidth) -> usize { } pub(crate) fn check_logical_lines( - tokens: &[LexResult], + tokens: &Tokens, locator: &Locator, indexer: &Indexer, stylist: &Stylist, diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index fbb9abff633e3..938c6be6e4dc2 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -3,6 +3,7 @@ use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextSize; @@ -19,6 +20,7 @@ pub(crate) fn check_physical_lines( locator: &Locator, stylist: &Stylist, indexer: &Indexer, + comment_ranges: &CommentRanges, doc_lines: &[TextSize], settings: &LinterSettings, ) -> Vec { @@ -42,7 +44,7 @@ pub(crate) fn check_physical_lines( .is_some() { if enforce_doc_line_too_long { - if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) { + if let Some(diagnostic) = doc_line_too_long(&line, comment_ranges, settings) { diagnostics.push(diagnostic); } } @@ -55,7 +57,7 @@ pub(crate) fn check_physical_lines( } if enforce_line_too_long { - if let Some(diagnostic) = line_too_long(&line, indexer, settings) { + if let Some(diagnostic) = line_too_long(&line, comment_ranges, settings) { diagnostics.push(diagnostic); } } @@ -90,8 +92,7 @@ pub(crate) fn check_physical_lines( mod tests { use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; - use ruff_python_parser::lexer::lex; - use ruff_python_parser::Mode; + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use crate::line_width::LineLength; @@ -105,15 +106,16 @@ mod tests { fn e501_non_ascii_char() { let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8. let locator = Locator::new(line); - let tokens: Vec<_> = lex(line, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&tokens, &locator); - let stylist = Stylist::from_tokens(&tokens, &locator); + let parsed = parse_module(line).unwrap(); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let check_with_max_line_length = |line_length: LineLength| { check_physical_lines( &locator, &stylist, &indexer, + parsed.comment_ranges(), &[], &LinterSettings { pycodestyle: pycodestyle::settings::Settings { diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 3f6e430f01391..0c59df78576bf 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -3,15 +3,16 @@ use std::path::Path; use ruff_notebook::CellOffsets; -use ruff_python_ast::PySourceType; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_diagnostics::Diagnostic; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_source_file::Locator; +use ruff_text_size::Ranged; use crate::directives::TodoComment; -use crate::linter::TokenSource; use crate::registry::{AsRule, Rule}; use crate::rules::pycodestyle::rules::BlankLinesChecker; use crate::rules::{ @@ -22,7 +23,7 @@ use crate::settings::LinterSettings; #[allow(clippy::too_many_arguments)] pub(crate) fn check_tokens( - tokens: &TokenSource, + parsed: &Parsed, path: &Path, locator: &Locator, indexer: &Indexer, @@ -33,6 +34,9 @@ pub(crate) fn check_tokens( ) -> Vec { let mut diagnostics: Vec = vec![]; + let tokens = parsed.tokens(); + let comment_ranges = parsed.comment_ranges(); + if settings.rules.any_enabled(&[ Rule::BlankLineBetweenMethods, Rule::BlankLinesTopLevel, @@ -42,22 +46,22 @@ pub(crate) fn check_tokens( Rule::BlankLinesBeforeNestedDefinition, ]) { BlankLinesChecker::new(locator, stylist, settings, source_type, cell_offsets) - .check_lines(tokens.kinds(), &mut diagnostics); + .check_lines(tokens, &mut diagnostics); } if settings.rules.enabled(Rule::BlanketTypeIgnore) { - pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, indexer, locator); + pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, comment_ranges, locator); } if settings.rules.enabled(Rule::EmptyComment) { - pylint::rules::empty_comments(&mut diagnostics, indexer, locator); + pylint::rules::empty_comments(&mut diagnostics, comment_ranges, locator); } if settings .rules .enabled(Rule::AmbiguousUnicodeCharacterComment) { - for range in indexer.comment_ranges() { + for range in comment_ranges { ruff::rules::ambiguous_unicode_character_comment( &mut diagnostics, locator, @@ -68,11 +72,16 @@ pub(crate) fn check_tokens( } if settings.rules.enabled(Rule::CommentedOutCode) { - eradicate::rules::commented_out_code(&mut diagnostics, locator, indexer, settings); + eradicate::rules::commented_out_code(&mut diagnostics, locator, comment_ranges, settings); } if settings.rules.enabled(Rule::UTF8EncodingDeclaration) { - pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer); + pyupgrade::rules::unnecessary_coding_comment( + &mut diagnostics, + locator, + indexer, + comment_ranges, + ); } if settings.rules.enabled(Rule::TabIndentation) { @@ -86,8 +95,13 @@ pub(crate) fn check_tokens( Rule::InvalidCharacterNul, Rule::InvalidCharacterZeroWidthSpace, ]) { - for (token, range) in tokens.kinds() { - pylint::rules::invalid_string_characters(&mut diagnostics, token, range, locator); + for token in tokens.up_to_first_unknown() { + pylint::rules::invalid_string_characters( + &mut diagnostics, + token.kind(), + token.range(), + locator, + ); } } @@ -98,7 +112,7 @@ pub(crate) fn check_tokens( ]) { pycodestyle::rules::compound_statements( &mut diagnostics, - tokens.kinds(), + tokens, locator, indexer, source_type, @@ -112,7 +126,7 @@ pub(crate) fn check_tokens( ]) { flake8_implicit_str_concat::rules::implicit( &mut diagnostics, - tokens.kinds(), + tokens, settings, locator, indexer, @@ -124,15 +138,15 @@ pub(crate) fn check_tokens( Rule::TrailingCommaOnBareTuple, Rule::ProhibitedTrailingComma, ]) { - flake8_commas::rules::trailing_commas(&mut diagnostics, tokens.kinds(), locator, indexer); + flake8_commas::rules::trailing_commas(&mut diagnostics, tokens, locator, indexer); } if settings.rules.enabled(Rule::ExtraneousParentheses) { - pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens.kinds(), locator); + pyupgrade::rules::extraneous_parentheses(&mut diagnostics, tokens, locator); } if source_type.is_stub() && settings.rules.enabled(Rule::TypeCommentInStub) { - flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, indexer); + flake8_pyi::rules::type_comment_in_stub(&mut diagnostics, locator, comment_ranges); } if settings.rules.any_enabled(&[ @@ -142,7 +156,7 @@ pub(crate) fn check_tokens( Rule::ShebangNotFirstLine, Rule::ShebangMissingPython, ]) { - flake8_executable::rules::from_tokens(&mut diagnostics, path, locator, indexer); + flake8_executable::rules::from_tokens(&mut diagnostics, path, locator, comment_ranges); } if settings.rules.any_enabled(&[ @@ -158,8 +172,7 @@ pub(crate) fn check_tokens( Rule::LineContainsTodo, Rule::LineContainsHack, ]) { - let todo_comments: Vec = indexer - .comment_ranges() + let todo_comments: Vec = comment_ranges .iter() .enumerate() .filter_map(|(i, comment_range)| { @@ -167,12 +180,12 @@ pub(crate) fn check_tokens( TodoComment::from_comment(comment, *comment_range, i) }) .collect(); - flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, indexer); + flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, comment_ranges); flake8_fixme::rules::todos(&mut diagnostics, &todo_comments); } if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) { - pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens.kinds()); + pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens); } diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule())); diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs index 8c6c74b83f3a7..398d02696a0a5 100644 --- a/crates/ruff_linter/src/directives.rs +++ b/crates/ruff_linter/src/directives.rs @@ -4,9 +4,9 @@ use std::iter::Peekable; use std::str::FromStr; use bitflags::bitflags; -use ruff_python_ast::StringFlags; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; +use ruff_python_ast::ModModule; +use ruff_python_parser::{Parsed, TokenKind, Tokens}; +use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_python_index::Indexer; @@ -52,19 +52,19 @@ pub struct Directives { } pub fn extract_directives( - lxr: &[LexResult], + parsed: &Parsed, flags: Flags, locator: &Locator, indexer: &Indexer, ) -> Directives { Directives { noqa_line_for: if flags.intersects(Flags::NOQA) { - extract_noqa_line_for(lxr, locator, indexer) + extract_noqa_line_for(parsed.tokens(), locator, indexer) } else { NoqaMapping::default() }, isort: if flags.intersects(Flags::ISORT) { - extract_isort_directives(locator, indexer) + extract_isort_directives(locator, parsed.comment_ranges()) } else { IsortDirectives::default() }, @@ -105,22 +105,22 @@ where } /// Extract a mapping from logical line to noqa line. -fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer) -> NoqaMapping { +fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping { let mut string_mappings = Vec::new(); - for (tok, range) in lxr.iter().flatten() { - match tok { - Tok::EndOfFile => { + for token in tokens.up_to_first_unknown() { + match token.kind() { + TokenKind::EndOfFile => { break; } // For multi-line strings, we expect `noqa` directives on the last line of the // string. - Tok::String { flags, .. } if flags.is_triple_quoted() => { - if locator.contains_line_break(*range) { + TokenKind::String if token.is_triple_quoted_string() => { + if locator.contains_line_break(token.range()) { string_mappings.push(TextRange::new( - locator.line_start(range.start()), - range.end(), + locator.line_start(token.start()), + token.end(), )); } } @@ -197,12 +197,12 @@ fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer } /// Extract a set of ranges over which to disable isort. -fn extract_isort_directives(locator: &Locator, indexer: &Indexer) -> IsortDirectives { +fn extract_isort_directives(locator: &Locator, comment_ranges: &CommentRanges) -> IsortDirectives { let mut exclusions: Vec = Vec::default(); let mut splits: Vec = Vec::default(); let mut off: Option = None; - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment_text = locator.slice(range); // `isort` allows for `# isort: skip` and `# isort: skip_file` to include or @@ -379,8 +379,7 @@ impl TodoDirectiveKind { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; + use ruff_python_parser::parse_module; use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_python_index::Indexer; @@ -391,12 +390,14 @@ mod tests { }; use crate::noqa::NoqaMapping; + use super::IsortDirectives; + fn noqa_mappings(contents: &str) -> NoqaMapping { - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); - extract_noqa_line_for(&lxr, &locator, &indexer) + extract_noqa_line_for(parsed.tokens(), &locator, &indexer) } #[test] @@ -566,29 +567,26 @@ assert foo, \ ); } + fn isort_directives(contents: &str) -> IsortDirectives { + let parsed = parse_module(contents).unwrap(); + let locator = Locator::new(contents); + extract_isort_directives(&locator, parsed.comment_ranges()) + } + #[test] fn isort_exclusions() { let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); let contents = "# isort: off x = 1 y = 2 # isort: on z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(25))]) ); @@ -599,11 +597,8 @@ y = 2 # isort: on z = x + 1 # isort: on"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(38))]) ); @@ -611,11 +606,8 @@ z = x + 1 x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, + isort_directives(contents).exclusions, Vec::from_iter([TextRange::at(TextSize::from(0), contents.text_len())]) ); @@ -623,13 +615,7 @@ z = x + 1"; x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); let contents = "# isort: off x = 1 @@ -637,13 +623,7 @@ x = 1 y = 2 # isort: skip_file z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).exclusions, - Vec::default() - ); + assert_eq!(isort_directives(contents).exclusions, Vec::default()); } #[test] @@ -651,36 +631,18 @@ z = x + 1"; let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - Vec::new() - ); + assert_eq!(isort_directives(contents).splits, Vec::new()); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - vec![TextSize::from(12)] - ); + assert_eq!(isort_directives(contents).splits, vec![TextSize::from(12)]); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let locator = Locator::new(contents); - let indexer = Indexer::from_tokens(&lxr, &locator); - assert_eq!( - extract_isort_directives(&locator, &indexer).splits, - vec![TextSize::from(13)] - ); + assert_eq!(isort_directives(contents).splits, vec![TextSize::from(13)]); } #[test] diff --git a/crates/ruff_linter/src/doc_lines.rs b/crates/ruff_linter/src/doc_lines.rs index eebe21cec3ca5..d1f780053db75 100644 --- a/crates/ruff_linter/src/doc_lines.rs +++ b/crates/ruff_linter/src/doc_lines.rs @@ -2,28 +2,29 @@ //! standalone comment or a constant string statement. use std::iter::FusedIterator; +use std::slice::Iter; use ruff_python_ast::{self as ast, Stmt, Suite}; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; use ruff_source_file::{Locator, UniversalNewlineIterator}; /// Extract doc lines (standalone comments) from a token sequence. -pub(crate) fn doc_lines_from_tokens(tokens: TokenKindIter) -> DocLines { +pub(crate) fn doc_lines_from_tokens(tokens: &Tokens) -> DocLines { DocLines::new(tokens) } pub(crate) struct DocLines<'a> { - inner: TokenKindIter<'a>, + inner: Iter<'a, Token>, prev: TextSize, } impl<'a> DocLines<'a> { - fn new(tokens: TokenKindIter<'a>) -> Self { + fn new(tokens: &'a Tokens) -> Self { Self { - inner: tokens, + inner: tokens.up_to_first_unknown().iter(), prev: TextSize::default(), } } @@ -35,12 +36,12 @@ impl Iterator for DocLines<'_> { fn next(&mut self) -> Option { let mut at_start_of_line = true; loop { - let (tok, range) = self.inner.next()?; + let token = self.inner.next()?; - match tok { + match token.kind() { TokenKind::Comment => { if at_start_of_line { - break Some(range.start()); + break Some(token.start()); } } TokenKind::Newline | TokenKind::NonLogicalNewline => { @@ -54,7 +55,7 @@ impl Iterator for DocLines<'_> { } } - self.prev = range.end(); + self.prev = token.end(); } } } diff --git a/crates/ruff_linter/src/fix/edits.rs b/crates/ruff_linter/src/fix/edits.rs index 3d45f1ea01bb1..0901a9f694a2f 100644 --- a/crates/ruff_linter/src/fix/edits.rs +++ b/crates/ruff_linter/src/fix/edits.rs @@ -531,8 +531,9 @@ mod tests { use test_case::test_case; use ruff_diagnostics::{Diagnostic, Edit, Fix}; + use ruff_python_ast::Stmt; use ruff_python_codegen::Stylist; - use ruff_python_parser::{lexer, parse_expression, parse_suite, Mode}; + use ruff_python_parser::{parse_expression, parse_module}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -541,17 +542,21 @@ mod tests { add_to_dunder_all, make_redundant_alias, next_stmt_break, trailing_semicolon, }; + /// Parse the given source using [`Mode::Module`] and return the first statement. + fn parse_first_stmt(source: &str) -> Result { + let suite = parse_module(source)?.into_suite(); + Ok(suite.into_iter().next().unwrap()) + } + #[test] fn find_semicolon() -> Result<()> { let contents = "x = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!(trailing_semicolon(stmt.end(), &locator), None); let contents = "x = 1; y = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -559,8 +564,7 @@ mod tests { ); let contents = "x = 1 ; y = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -572,8 +576,7 @@ x = 1 \ ; y = 1 " .trim(); - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; let locator = Locator::new(contents); assert_eq!( trailing_semicolon(stmt.end(), &locator), @@ -612,12 +615,11 @@ x = 1 \ } #[test] - fn redundant_alias() { + fn redundant_alias() -> Result<()> { let contents = "import x, y as y, z as bees"; - let program = parse_suite(contents).unwrap(); - let stmt = program.first().unwrap(); + let stmt = parse_first_stmt(contents)?; assert_eq!( - make_redundant_alias(["x"].into_iter().map(Cow::from), stmt), + make_redundant_alias(["x"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), @@ -625,7 +627,7 @@ x = 1 \ "make just one item redundant" ); assert_eq!( - make_redundant_alias(vec!["x", "y"].into_iter().map(Cow::from), stmt), + make_redundant_alias(vec!["x", "y"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), @@ -633,13 +635,14 @@ x = 1 \ "the second item is already a redundant alias" ); assert_eq!( - make_redundant_alias(vec!["x", "z"].into_iter().map(Cow::from), stmt), + make_redundant_alias(vec!["x", "z"].into_iter().map(Cow::from), &stmt), vec![Edit::range_replacement( String::from("x as x"), TextRange::new(TextSize::new(7), TextSize::new(8)), )], "the third item is already aliased to something else" ); + Ok(()) } #[test_case("()", &["x", "y"], r#"("x", "y")"# ; "2 into empty tuple")] @@ -661,13 +664,9 @@ x = 1 \ fn add_to_dunder_all_test(raw: &str, names: &[&str], expect: &str) -> Result<()> { let locator = Locator::new(raw); let edits = { - let expr = parse_expression(raw)?; - let stylist = Stylist::from_tokens( - &lexer::lex(raw, Mode::Expression).collect::>(), - &locator, - ); - // SUT - add_to_dunder_all(names.iter().copied(), &expr, &stylist) + let parsed = parse_expression(raw)?; + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + add_to_dunder_all(names.iter().copied(), parsed.expr(), &stylist) }; let diag = { use crate::rules::pycodestyle::rules::MissingNewlineAtEndOfFile; diff --git a/crates/ruff_linter/src/importer/insertion.rs b/crates/ruff_linter/src/importer/insertion.rs index 274147a756e84..715405e19ece5 100644 --- a/crates/ruff_linter/src/importer/insertion.rs +++ b/crates/ruff_linter/src/importer/insertion.rs @@ -1,8 +1,8 @@ //! Insert statements into Python code. use std::ops::Add; -use ruff_python_ast::{PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::Stmt; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::Edit; @@ -145,7 +145,7 @@ impl<'a> Insertion<'a> { mut location: TextSize, locator: &Locator<'a>, stylist: &Stylist, - source_type: PySourceType, + tokens: &Tokens, ) -> Insertion<'a> { enum Awaiting { Colon(u32), @@ -154,40 +154,38 @@ impl<'a> Insertion<'a> { } let mut state = Awaiting::Colon(0); - for (tok, range) in - lexer::lex_starts_at(locator.after(location), source_type.as_mode(), location).flatten() - { + for token in tokens.after(location) { match state { // Iterate until we find the colon indicating the start of the block body. - Awaiting::Colon(depth) => match tok { - Tok::Colon if depth == 0 => { + Awaiting::Colon(depth) => match token.kind() { + TokenKind::Colon if depth == 0 => { state = Awaiting::Newline; } - Tok::Lpar | Tok::Lbrace | Tok::Lsqb => { + TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { state = Awaiting::Colon(depth.saturating_add(1)); } - Tok::Rpar | Tok::Rbrace | Tok::Rsqb => { + TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { state = Awaiting::Colon(depth.saturating_sub(1)); } _ => {} }, // Once we've seen the colon, we're looking for a newline; otherwise, there's no // block body (e.g. `if True: pass`). - Awaiting::Newline => match tok { - Tok::Comment(..) => {} - Tok::Newline => { + Awaiting::Newline => match token.kind() { + TokenKind::Comment => {} + TokenKind::Newline => { state = Awaiting::Indent; } _ => { - location = range.start(); + location = token.start(); break; } }, // Once we've seen the newline, we're looking for the indentation of the block body. - Awaiting::Indent => match tok { - Tok::Comment(..) => {} - Tok::NonLogicalNewline => {} - Tok::Indent => { + Awaiting::Indent => match token.kind() { + TokenKind::Comment => {} + TokenKind::NonLogicalNewline => {} + TokenKind::Indent => { // This is like: // ```python // if True: @@ -196,13 +194,13 @@ impl<'a> Insertion<'a> { // Where `range` is the indentation before the `pass` token. return Insertion::indented( "", - range.start(), + token.start(), stylist.line_ending().as_str(), - locator.slice(range), + locator.slice(token), ); } _ => { - location = range.start(); + location = token.start(); break; } }, @@ -319,9 +317,8 @@ fn match_continuation(s: &str) -> Option { mod tests { use anyhow::Result; - use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; - use ruff_python_parser::{parse_suite, Mode}; + use ruff_python_parser::parse_module; use ruff_source_file::{LineEnding, Locator}; use ruff_text_size::TextSize; @@ -330,11 +327,10 @@ mod tests { #[test] fn start_of_file() -> Result<()> { fn insert(contents: &str) -> Result { - let program = parse_suite(contents)?; - let tokens = ruff_python_parser::tokenize(contents, Mode::Module); + let parsed = parse_module(contents)?; let locator = Locator::new(contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - Ok(Insertion::start_of_file(&program, &locator, &stylist)) + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + Ok(Insertion::start_of_file(parsed.suite(), &locator, &stylist)) } let contents = ""; @@ -442,10 +438,10 @@ x = 1 #[test] fn start_of_block() { fn insert(contents: &str, offset: TextSize) -> Insertion { - let tokens = ruff_python_parser::tokenize(contents, Mode::Module); + let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - Insertion::start_of_block(offset, &locator, &stylist, PySourceType::default()) + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + Insertion::start_of_block(offset, &locator, &stylist, parsed.tokens()) } let contents = "if True: pass"; diff --git a/crates/ruff_linter/src/importer/mod.rs b/crates/ruff_linter/src/importer/mod.rs index e59a265a7b19e..51ada8f45d37b 100644 --- a/crates/ruff_linter/src/importer/mod.rs +++ b/crates/ruff_linter/src/importer/mod.rs @@ -7,7 +7,8 @@ use std::error::Error; use anyhow::Result; use libcst_native::{ImportAlias, Name, NameOrAttribute}; -use ruff_python_ast::{self as ast, PySourceType, Stmt}; +use ruff_python_ast::{self as ast, ModModule, Stmt}; +use ruff_python_parser::{Parsed, Tokens}; use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::Edit; @@ -27,6 +28,8 @@ mod insertion; pub(crate) struct Importer<'a> { /// The Python AST to which we are adding imports. python_ast: &'a [Stmt], + /// The tokens representing the Python AST. + tokens: &'a Tokens, /// The [`Locator`] for the Python AST. locator: &'a Locator<'a>, /// The [`Stylist`] for the Python AST. @@ -39,12 +42,13 @@ pub(crate) struct Importer<'a> { impl<'a> Importer<'a> { pub(crate) fn new( - python_ast: &'a [Stmt], + parsed: &'a Parsed, locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, ) -> Self { Self { - python_ast, + python_ast: parsed.suite(), + tokens: parsed.tokens(), locator, stylist, runtime_imports: Vec::default(), @@ -121,7 +125,6 @@ impl<'a> Importer<'a> { import: &ImportedMembers, at: TextSize, semantic: &SemanticModel, - source_type: PySourceType, ) -> Result { // Generate the modified import statement. let content = fix::codemods::retain_imports( @@ -178,7 +181,7 @@ impl<'a> Importer<'a> { // Add the import to a `TYPE_CHECKING` block. let add_import_edit = if let Some(block) = self.preceding_type_checking_block(at) { // Add the import to the `TYPE_CHECKING` block. - self.add_to_type_checking_block(&content, block.start(), source_type) + self.add_to_type_checking_block(&content, block.start()) } else { // Add the import to a new `TYPE_CHECKING` block. self.add_type_checking_block( @@ -455,13 +458,8 @@ impl<'a> Importer<'a> { } /// Add an import statement to an existing `TYPE_CHECKING` block. - fn add_to_type_checking_block( - &self, - content: &str, - at: TextSize, - source_type: PySourceType, - ) -> Edit { - Insertion::start_of_block(at, self.locator, self.stylist, source_type).into_edit(content) + fn add_to_type_checking_block(&self, content: &str, at: TextSize) -> Edit { + Insertion::start_of_block(at, self.locator, self.stylist, self.tokens).into_edit(content) } /// Return the import statement that precedes the given position, if any. diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index 86d59b6d4eae4..7a36e67d5b2aa 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -10,11 +10,10 @@ use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_notebook::Notebook; -use ruff_python_ast::{PySourceType, Suite}; +use ruff_python_ast::{ModModule, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::{AsMode, ParseError, TokenKindIter, Tokens}; +use ruff_python_parser::{ParseError, Parsed}; use ruff_source_file::{Locator, SourceFileBuilder}; use ruff_text_size::Ranged; @@ -82,18 +81,21 @@ pub fn check_path( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, - tokens: TokenSource, + parsed: &Parsed, ) -> LinterResult> { // Aggregate all diagnostics. let mut diagnostics = vec![]; let mut error = None; + let tokens = parsed.tokens(); + let comment_ranges = parsed.comment_ranges(); + // Collect doc lines. This requires a rare mix of tokens (for comments) and AST // (for docstrings), which demands special-casing at this level. let use_doc_lines = settings.rules.enabled(Rule::DocLineTooLong); let mut doc_lines = vec![]; if use_doc_lines { - doc_lines.extend(doc_lines_from_tokens(tokens.kinds())); + doc_lines.extend(doc_lines_from_tokens(tokens)); } // Run the token-based rules. @@ -103,7 +105,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_tokens()) { diagnostics.extend(check_tokens( - &tokens, + parsed, path, locator, indexer, @@ -120,7 +122,13 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_filesystem()) { - diagnostics.extend(check_file_path(path, package, locator, indexer, settings)); + diagnostics.extend(check_file_path( + path, + package, + locator, + comment_ranges, + settings, + )); } // Run the logical line-based rules. @@ -130,7 +138,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_logical_lines()) { diagnostics.extend(crate::checkers::logical_lines::check_logical_lines( - &tokens, locator, indexer, stylist, settings, + tokens, locator, indexer, stylist, settings, )); } @@ -145,14 +153,13 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_imports()); if use_ast || use_imports || use_doc_lines { - // Parse, if the AST wasn't pre-provided provided. - match tokens.into_ast(source_kind, source_type) { - Ok(python_ast) => { + match parsed.as_result() { + Ok(parsed) => { let cell_offsets = source_kind.as_ipy_notebook().map(Notebook::cell_offsets); let notebook_index = source_kind.as_ipy_notebook().map(Notebook::index); if use_ast { diagnostics.extend(check_ast( - &python_ast, + parsed, locator, stylist, indexer, @@ -168,7 +175,7 @@ pub fn check_path( } if use_imports { let import_diagnostics = check_imports( - &python_ast, + parsed, locator, indexer, &directives.isort, @@ -182,7 +189,7 @@ pub fn check_path( diagnostics.extend(import_diagnostics); } if use_doc_lines { - doc_lines.extend(doc_lines_from_ast(&python_ast, locator)); + doc_lines.extend(doc_lines_from_ast(parsed.suite(), locator)); } } Err(parse_error) => { @@ -191,8 +198,9 @@ pub fn check_path( // if it's disabled via any of the usual mechanisms (e.g., `noqa`, // `per-file-ignores`), and the easiest way to detect that suppression is // to see if the diagnostic persists to the end of the function. - pycodestyle::rules::syntax_error(&mut diagnostics, &parse_error, locator); - error = Some(parse_error); + pycodestyle::rules::syntax_error(&mut diagnostics, parse_error, locator); + // TODO(dhruvmanila): Remove this clone + error = Some(parse_error.clone()); } } } @@ -210,7 +218,12 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_physical_lines()) { diagnostics.extend(check_physical_lines( - locator, stylist, indexer, &doc_lines, settings, + locator, + stylist, + indexer, + comment_ranges, + &doc_lines, + settings, )); } @@ -222,36 +235,44 @@ pub fn check_path( continue; } let diagnostic = match test_rule { - Rule::StableTestRule => test_rules::StableTestRule::diagnostic(locator, indexer), + Rule::StableTestRule => { + test_rules::StableTestRule::diagnostic(locator, comment_ranges) + } Rule::StableTestRuleSafeFix => { - test_rules::StableTestRuleSafeFix::diagnostic(locator, indexer) + test_rules::StableTestRuleSafeFix::diagnostic(locator, comment_ranges) } Rule::StableTestRuleUnsafeFix => { - test_rules::StableTestRuleUnsafeFix::diagnostic(locator, indexer) + test_rules::StableTestRuleUnsafeFix::diagnostic(locator, comment_ranges) } Rule::StableTestRuleDisplayOnlyFix => { - test_rules::StableTestRuleDisplayOnlyFix::diagnostic(locator, indexer) + test_rules::StableTestRuleDisplayOnlyFix::diagnostic(locator, comment_ranges) + } + Rule::NurseryTestRule => { + test_rules::NurseryTestRule::diagnostic(locator, comment_ranges) + } + Rule::PreviewTestRule => { + test_rules::PreviewTestRule::diagnostic(locator, comment_ranges) } - Rule::NurseryTestRule => test_rules::NurseryTestRule::diagnostic(locator, indexer), - Rule::PreviewTestRule => test_rules::PreviewTestRule::diagnostic(locator, indexer), Rule::DeprecatedTestRule => { - test_rules::DeprecatedTestRule::diagnostic(locator, indexer) + test_rules::DeprecatedTestRule::diagnostic(locator, comment_ranges) } Rule::AnotherDeprecatedTestRule => { - test_rules::AnotherDeprecatedTestRule::diagnostic(locator, indexer) + test_rules::AnotherDeprecatedTestRule::diagnostic(locator, comment_ranges) + } + Rule::RemovedTestRule => { + test_rules::RemovedTestRule::diagnostic(locator, comment_ranges) } - Rule::RemovedTestRule => test_rules::RemovedTestRule::diagnostic(locator, indexer), Rule::AnotherRemovedTestRule => { - test_rules::AnotherRemovedTestRule::diagnostic(locator, indexer) + test_rules::AnotherRemovedTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedToTestRule => { - test_rules::RedirectedToTestRule::diagnostic(locator, indexer) + test_rules::RedirectedToTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedFromTestRule => { - test_rules::RedirectedFromTestRule::diagnostic(locator, indexer) + test_rules::RedirectedFromTestRule::diagnostic(locator, comment_ranges) } Rule::RedirectedFromPrefixTestRule => { - test_rules::RedirectedFromPrefixTestRule::diagnostic(locator, indexer) + test_rules::RedirectedFromPrefixTestRule::diagnostic(locator, comment_ranges) } _ => unreachable!("All test rules must have an implementation"), }; @@ -288,7 +309,7 @@ pub fn check_path( &mut diagnostics, path, locator, - indexer.comment_ranges(), + comment_ranges, &directives.noqa_line_for, error.is_none(), &per_file_ignores, @@ -350,23 +371,21 @@ pub fn add_noqa_to_path( source_type: PySourceType, settings: &LinterSettings, ) -> Result { - let contents = source_kind.source_code(); - - // Tokenize once. - let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); + // Parse once. + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); // Map row and column locations to byte slices (lazily). - let locator = Locator::new(contents); + let locator = Locator::new(source_kind.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -387,7 +406,7 @@ pub fn add_noqa_to_path( flags::Noqa::Disabled, source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); // Log any parse errors. @@ -409,7 +428,7 @@ pub fn add_noqa_to_path( path, &diagnostics, &locator, - indexer.comment_ranges(), + parsed.comment_ranges(), &settings.external, &directives.noqa_line_for, stylist.line_ending(), @@ -425,23 +444,22 @@ pub fn lint_only( noqa: flags::Noqa, source_kind: &SourceKind, source_type: PySourceType, - data: ParseSource, + source: ParseSource, ) -> LinterResult> { - // Tokenize once. - let tokens = data.into_token_source(source_kind, source_type); + let parsed = source.into_parsed(source_kind, source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(source_kind.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -459,7 +477,7 @@ pub fn lint_only( noqa, source_kind, source_type, - tokens, + &parsed, ); result.map(|diagnostics| diagnostics_to_messages(diagnostics, path, &locator, &directives)) @@ -517,21 +535,22 @@ pub fn lint_fix<'a>( // Continuously fix until the source code stabilizes. loop { - // Tokenize once. - let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); + // Parse once. + let parsed = + ruff_python_parser::parse_unchecked_source(transformed.source_code(), source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(transformed.source_code()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -549,7 +568,7 @@ pub fn lint_fix<'a>( noqa, &transformed, source_type, - TokenSource::Tokens(tokens), + &parsed, ); if iterations == 0 { @@ -685,70 +704,21 @@ This indicates a bug in Ruff. If you could open an issue at: #[derive(Debug, Clone)] pub enum ParseSource { - /// Extract the tokens and AST from the given source code. + /// Parse the [`Parsed`] from the given source code. None, - /// Use the precomputed tokens and AST. - Precomputed { tokens: Tokens, ast: Suite }, + /// Use the precomputed [`Parsed`]. + Precomputed(Parsed), } impl ParseSource { - /// Convert to a [`TokenSource`], tokenizing if necessary. - fn into_token_source(self, source_kind: &SourceKind, source_type: PySourceType) -> TokenSource { - match self { - Self::None => TokenSource::Tokens(ruff_python_parser::tokenize( - source_kind.source_code(), - source_type.as_mode(), - )), - Self::Precomputed { tokens, ast } => TokenSource::Precomputed { tokens, ast }, - } - } -} - -#[derive(Debug, Clone)] -pub enum TokenSource { - /// Use the precomputed tokens to generate the AST. - Tokens(Tokens), - /// Use the precomputed tokens and AST. - Precomputed { tokens: Tokens, ast: Suite }, -} - -impl TokenSource { - /// Returns an iterator over the [`TokenKind`] and the corresponding range. - /// - /// [`TokenKind`]: ruff_python_parser::TokenKind - pub fn kinds(&self) -> TokenKindIter { - match self { - TokenSource::Tokens(tokens) => tokens.kinds(), - TokenSource::Precomputed { tokens, .. } => TokenKindIter::new(tokens), - } - } -} - -impl Deref for TokenSource { - type Target = [LexResult]; - - fn deref(&self) -> &Self::Target { + /// Consumes the [`ParseSource`] and returns the parsed [`Parsed`], parsing the source code if + /// necessary. + fn into_parsed(self, source_kind: &SourceKind, source_type: PySourceType) -> Parsed { match self { - Self::Tokens(tokens) => tokens, - Self::Precomputed { tokens, .. } => tokens, - } - } -} - -impl TokenSource { - /// Convert to an [`AstSource`], parsing if necessary. - fn into_ast( - self, - source_kind: &SourceKind, - source_type: PySourceType, - ) -> Result { - match self { - Self::Tokens(tokens) => Ok(ruff_python_parser::parse_program_tokens( - tokens, - source_kind.source_code(), - source_type.is_ipynb(), - )?), - Self::Precomputed { ast, .. } => Ok(ast), + ParseSource::None => { + ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type) + } + ParseSource::Precomputed(parsed) => parsed, } } } diff --git a/crates/ruff_linter/src/rules/eradicate/detection.rs b/crates/ruff_linter/src/rules/eradicate/detection.rs index fa870287c6572..f2e0229668367 100644 --- a/crates/ruff_linter/src/rules/eradicate/detection.rs +++ b/crates/ruff_linter/src/rules/eradicate/detection.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use once_cell::sync::Lazy; use regex::{Regex, RegexSet}; -use ruff_python_parser::parse_suite; +use ruff_python_parser::parse_module; use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer}; use ruff_text_size::TextSize; @@ -84,7 +84,7 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool { } // Finally, compile the source code. - parse_suite(line).is_ok() + parse_module(line).is_ok() } #[cfg(test)] diff --git a/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs b/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs index 4c17871ae574b..9848f161d9397 100644 --- a/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs +++ b/crates/ruff_linter/src/rules/eradicate/rules/commented_out_code.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use crate::settings::LinterSettings; @@ -47,14 +47,14 @@ impl Violation for CommentedOutCode { pub(crate) fn commented_out_code( diagnostics: &mut Vec, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) { // Skip comments within `/// script` tags. let mut in_script_tag = false; // Iterate over all comments in the document. - for range in indexer.comment_ranges() { + for range in comment_ranges { let line = locator.lines(*range); // Detect `/// script` tags. diff --git a/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs b/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs index 4a1b7ecf6f69f..7e38b527e77d9 100644 --- a/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs +++ b/crates/ruff_linter/src/rules/flake8_bugbear/rules/zip_without_explicit_strict.rs @@ -68,7 +68,7 @@ pub(crate) fn zip_without_explicit_strict(checker: &mut Checker, call: &ast::Exp add_argument( "strict=False", &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ), // If the function call contains `**kwargs`, mark the fix as unsafe. diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs index c1598262b2799..69c1c8598b052 100644 --- a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs +++ b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -27,31 +27,31 @@ enum TokenType { /// Simplified token specialized for the task. #[derive(Copy, Clone)] -struct Token { +struct SimpleToken { ty: TokenType, range: TextRange, } -impl Ranged for Token { +impl Ranged for SimpleToken { fn range(&self) -> TextRange { self.range } } -impl Token { +impl SimpleToken { fn new(ty: TokenType, range: TextRange) -> Self { Self { ty, range } } - fn irrelevant() -> Token { - Token { + fn irrelevant() -> SimpleToken { + SimpleToken { ty: TokenType::Irrelevant, range: TextRange::default(), } } } -impl From<(TokenKind, TextRange)> for Token { +impl From<(TokenKind, TextRange)> for SimpleToken { fn from((tok, range): (TokenKind, TextRange)) -> Self { let ty = match tok { TokenKind::Name => TokenType::Named, @@ -226,13 +226,13 @@ impl AlwaysFixableViolation for ProhibitedTrailingComma { /// COM812, COM818, COM819 pub(crate) fn trailing_commas( diagnostics: &mut Vec, - tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, indexer: &Indexer, ) { let mut fstrings = 0u32; - let tokens = tokens.filter_map(|(token, tok_range)| { - match token { + let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| { + match token.kind() { // Completely ignore comments -- they just interfere with the logic. TokenKind::Comment => None, // F-strings are handled as `String` token type with the complete range @@ -247,15 +247,15 @@ pub(crate) fn trailing_commas( if fstrings == 0 { indexer .fstring_ranges() - .outermost(tok_range.start()) - .map(|range| Token::new(TokenType::String, range)) + .outermost(token.start()) + .map(|range| SimpleToken::new(TokenType::String, range)) } else { None } } _ => { if fstrings == 0 { - Some(Token::from((token, tok_range))) + Some(SimpleToken::from(token.as_tuple())) } else { None } @@ -263,12 +263,12 @@ pub(crate) fn trailing_commas( } }); - let mut prev = Token::irrelevant(); - let mut prev_prev = Token::irrelevant(); + let mut prev = SimpleToken::irrelevant(); + let mut prev_prev = SimpleToken::irrelevant(); let mut stack = vec![Context::new(ContextType::No)]; - for token in tokens { + for token in simple_tokens { if prev.ty == TokenType::NonLogicalNewline && token.ty == TokenType::NonLogicalNewline { // Collapse consecutive newlines to the first one -- trailing commas are // added before the first newline. @@ -301,9 +301,9 @@ pub(crate) fn trailing_commas( } fn check_token( - token: Token, - prev: Token, - prev_prev: Token, + token: SimpleToken, + prev: SimpleToken, + prev_prev: SimpleToken, context: Context, locator: &Locator, ) -> Option { @@ -387,9 +387,9 @@ fn check_token( } fn update_context( - token: Token, - prev: Token, - prev_prev: Token, + token: SimpleToken, + prev: SimpleToken, + prev_prev: SimpleToken, stack: &mut Vec, ) -> Context { let new_context = match token.ty { diff --git a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs index 5166fbdd3eff2..9d1c59e387c53 100644 --- a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs +++ b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs @@ -139,7 +139,7 @@ pub(crate) fn unnecessary_generator_list(checker: &mut Checker, call: &ast::Expr let range = parenthesized_range( argument.into(), (&call.arguments).into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(argument.range()); diff --git a/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs b/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs index 4feb54de314bf..114e7dbef1ddb 100644 --- a/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_executable/rules/mod.rs @@ -1,7 +1,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; pub(crate) use shebang_leading_whitespace::*; pub(crate) use shebang_missing_executable_file::*; @@ -21,10 +21,10 @@ pub(crate) fn from_tokens( diagnostics: &mut Vec, path: &Path, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { let mut has_any_shebang = false; - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment = locator.slice(*range); if let Some(shebang) = ShebangDirective::try_extract(comment) { has_any_shebang = true; diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs index b254fe8338426..5cbd3f46e76b8 100644 --- a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs +++ b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs @@ -4,9 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; -use ruff_text_size::TextRange; +use ruff_text_size::{Ranged, TextRange}; use crate::settings::LinterSettings; @@ -92,37 +92,39 @@ impl Violation for MultiLineImplicitStringConcatenation { /// ISC001, ISC002 pub(crate) fn implicit( diagnostics: &mut Vec, - tokens: TokenKindIter, + tokens: &Tokens, settings: &LinterSettings, locator: &Locator, indexer: &Indexer, ) { - for ((a_tok, a_range), (b_tok, b_range)) in tokens - .filter(|(token, _)| { - *token != TokenKind::Comment + for (a_token, b_token) in tokens + .up_to_first_unknown() + .iter() + .filter(|token| { + token.kind() != TokenKind::Comment && (settings.flake8_implicit_str_concat.allow_multiline - || *token != TokenKind::NonLogicalNewline) + || token.kind() != TokenKind::NonLogicalNewline) }) .tuple_windows() { - let (a_range, b_range) = match (a_tok, b_tok) { - (TokenKind::String, TokenKind::String) => (a_range, b_range), + let (a_range, b_range) = match (a_token.kind(), b_token.kind()) { + (TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()), (TokenKind::String, TokenKind::FStringStart) => { - match indexer.fstring_ranges().innermost(b_range.start()) { - Some(b_range) => (a_range, b_range), + match indexer.fstring_ranges().innermost(b_token.start()) { + Some(b_range) => (a_token.range(), b_range), None => continue, } } (TokenKind::FStringEnd, TokenKind::String) => { - match indexer.fstring_ranges().innermost(a_range.start()) { - Some(a_range) => (a_range, b_range), + match indexer.fstring_ranges().innermost(a_token.start()) { + Some(a_range) => (a_range, b_token.range()), None => continue, } } (TokenKind::FStringEnd, TokenKind::FStringStart) => { match ( - indexer.fstring_ranges().innermost(a_range.start()), - indexer.fstring_ranges().innermost(b_range.start()), + indexer.fstring_ranges().innermost(a_token.start()), + indexer.fstring_ranges().innermost(b_token.start()), ) { (Some(a_range), Some(b_range)) => (a_range, b_range), _ => continue, diff --git a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs index 26cdea8dcb7b6..10c3a591dd0d6 100644 --- a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs +++ b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -45,7 +45,7 @@ pub(crate) fn implicit_namespace_package( path: &Path, package: Option<&Path>, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, project_root: &Path, src: &[PathBuf], ) -> Option { @@ -61,8 +61,7 @@ pub(crate) fn implicit_namespace_package( .parent() .is_some_and( |parent| src.iter().any(|src| src == parent)) // Ignore files that contain a shebang. - && !indexer - .comment_ranges() + && !comment_ranges .first().filter(|range| range.start() == TextSize::from(0)) .is_some_and(|range| ShebangDirective::try_extract(locator.slice(*range)).is_some()) { diff --git a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs index 61aa28988a397..1f0b799855815 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs +++ b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_dict_kwargs.rs @@ -129,7 +129,7 @@ pub(crate) fn unnecessary_dict_kwargs(checker: &mut Checker, call: &ast::ExprCal parenthesized_range( value.into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(value.range()) diff --git a/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs b/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs index c08f74870a867..6c104f0006383 100644 --- a/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs +++ b/crates/ruff_linter/src/rules/flake8_pyi/rules/generic_not_last_base_class.rs @@ -114,7 +114,7 @@ fn generate_fix( let insertion = add_argument( locator.slice(generic_base), arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), source, ); diff --git a/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs b/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs index c52d3531805d8..c21558f590ee0 100644 --- a/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs +++ b/crates/ruff_linter/src/rules/flake8_pyi/rules/type_comment_in_stub.rs @@ -1,6 +1,6 @@ use once_cell::sync::Lazy; use regex::Regex; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_diagnostics::{Diagnostic, Violation}; @@ -38,9 +38,9 @@ impl Violation for TypeCommentInStub { pub(crate) fn type_comment_in_stub( diagnostics: &mut Vec, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { - for range in indexer.comment_ranges() { + for range in comment_ranges { let comment = locator.slice(*range); if TYPE_COMMENT_REGEX.is_match(comment) && !TYPE_IGNORE_REGEX.is_match(comment) { diff --git a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs index 3aaa7e8b3d71a..674fb0f4bfef2 100644 --- a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs +++ b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs @@ -284,7 +284,7 @@ pub(crate) fn unittest_assertion( // the assertion is part of a larger expression. if checker.semantic().current_statement().is_expr_stmt() && checker.semantic().current_expression_parent().is_none() - && !checker.indexer().comment_ranges().intersects(expr.range()) + && !checker.parsed().comment_ranges().intersects(expr.range()) { if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) { diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement( @@ -292,7 +292,7 @@ pub(crate) fn unittest_assertion( parenthesized_range( expr.into(), checker.semantic().current_statement().into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()), @@ -385,7 +385,7 @@ pub(crate) fn unittest_raises_assertion( call.func.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(call, checker.locator()) { @@ -745,7 +745,7 @@ pub(crate) fn composite_condition( let mut diagnostic = Diagnostic::new(PytestCompositeAssertion, stmt.range()); if matches!(composite, CompositionKind::Simple) && msg.is_none() - && !checker.indexer().comment_ranges().intersects(stmt.range()) + && !checker.parsed().comment_ranges().intersects(stmt.range()) && !checker .indexer() .in_multi_statement_line(stmt, checker.locator()) diff --git a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs index 4f7cd1c4b4dd6..6ef3b8687ec6f 100644 --- a/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs +++ b/crates/ruff_linter/src/rules/flake8_pytest_style/rules/parametrize.rs @@ -353,7 +353,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) { let name_range = get_parametrize_name_range( decorator, expr, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()); @@ -388,7 +388,7 @@ fn check_names(checker: &mut Checker, decorator: &Decorator, expr: &Expr) { let name_range = get_parametrize_name_range( decorator, expr, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(expr.range()); @@ -681,11 +681,7 @@ fn check_duplicates(checker: &mut Checker, values: &Expr) { let element_end = trailing_comma(element, checker.locator().contents(), values_end); let deletion_range = TextRange::new(previous_end, element_end); - if !checker - .indexer() - .comment_ranges() - .intersects(deletion_range) - { + if !checker.parsed().comment_ranges().intersects(deletion_range) { diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion(deletion_range))); } } diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs index fe6a01df95553..241ba9695d716 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_bool_op.rs @@ -527,7 +527,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) { // Avoid removing comments. if checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { @@ -779,7 +779,7 @@ fn is_short_circuit( parenthesized_range( furthest.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(furthest.range()) @@ -807,7 +807,7 @@ fn is_short_circuit( parenthesized_range( furthest.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(furthest.range()) diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs index 6b8d107520cf8..f9b9b5752e712 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_ifexp.rs @@ -164,7 +164,7 @@ pub(crate) fn if_expr_with_true_false( parenthesized_range( test.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(test.range()), diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs index 881f4b36914a4..17b04340f527f 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/ast_with.rs @@ -168,7 +168,7 @@ pub(crate) fn multiple_with_statements( TextRange::new(with_stmt.start(), colon.end()), ); if !checker - .indexer() + .parsed() .comment_ranges() .intersects(TextRange::new(with_stmt.start(), with_stmt.body[0].start())) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs index 8fb4f17fae23b..2a78b971c5634 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/collapsible_if.rs @@ -113,14 +113,10 @@ pub(crate) fn nested_if_statements( ); // The fixer preserves comments in the nested body, but removes comments between // the outer and inner if statements. - if !checker - .indexer() - .comment_ranges() - .intersects(TextRange::new( - nested_if.start(), - nested_if.body()[0].start(), - )) - { + if !checker.parsed().comment_ranges().intersects(TextRange::new( + nested_if.start(), + nested_if.body()[0].start(), + )) { match collapse_nested_if(checker.locator(), checker.stylist(), nested_if) { Ok(edit) => { if edit.content().map_or(true, |content| { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs index 71144145d2a2a..64a0294816857 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_dict_get.rs @@ -210,7 +210,7 @@ pub(crate) fn if_else_block_instead_of_dict_get(checker: &mut Checker, stmt_if: stmt_if.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt_if, checker.locator()) { @@ -300,7 +300,7 @@ pub(crate) fn if_exp_instead_of_dict_get( expr.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs index 0740b997168e1..60deb30459e25 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_else_block_instead_of_if_exp.rs @@ -143,7 +143,7 @@ pub(crate) fn if_else_block_instead_of_if_exp(checker: &mut Checker, stmt_if: &a stmt_if.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt_if, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs index 6494e262f68c5..e43eb1b7c02a4 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/if_with_same_arms.rs @@ -8,8 +8,7 @@ use ruff_python_ast::comparable::ComparableStmt; use ruff_python_ast::parenthesize::parenthesized_range; use ruff_python_ast::stmt_if::{if_elif_branches, IfElifBranch}; use ruff_python_ast::{self as ast, Expr}; -use ruff_python_index::Indexer; -use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer}; +use ruff_python_trivia::{CommentRanges, SimpleTokenKind, SimpleTokenizer}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -74,13 +73,13 @@ pub(crate) fn if_with_same_arms(checker: &mut Checker, stmt_if: &ast::StmtIf) { // ...and the same comments let first_comments = checker - .indexer() + .parsed() .comment_ranges() .comments_in_range(body_range(¤t_branch, checker.locator())) .iter() .map(|range| checker.locator().slice(*range)); let second_comments = checker - .indexer() + .parsed() .comment_ranges() .comments_in_range(body_range(following_branch, checker.locator())) .iter() @@ -100,7 +99,7 @@ pub(crate) fn if_with_same_arms(checker: &mut Checker, stmt_if: &ast::StmtIf) { ¤t_branch, following_branch, checker.locator(), - checker.indexer(), + checker.parsed().comment_ranges(), ) }); @@ -114,7 +113,7 @@ fn merge_branches( current_branch: &IfElifBranch, following_branch: &IfElifBranch, locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) -> Result { // Identify the colon (`:`) at the end of the current branch's test. let Some(current_branch_colon) = @@ -133,7 +132,7 @@ fn merge_branches( let following_branch_test = if let Some(range) = parenthesized_range( following_branch.test.into(), stmt_if.into(), - indexer.comment_ranges(), + comment_ranges, locator.contents(), ) { Cow::Borrowed(locator.slice(range)) diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs index 7ebcd9f9f30b5..619fdddca4490 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/key_in_dict.rs @@ -100,14 +100,14 @@ fn key_in_dict( let left_range = parenthesized_range( left.into(), parent, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(left.range()); let right_range = parenthesized_range( right.into(), parent, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(right.range()); diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs index 1eb1943c89d6d..ac51e2ea68cc8 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/needless_bool.rs @@ -194,7 +194,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) { // Generate the replacement condition. let condition = if checker - .indexer() + .parsed() .comment_ranges() .has_comments(&range, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs index 936a96fe4c891..b94cdacec5604 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/suppressible_exception.rs @@ -126,7 +126,7 @@ pub(crate) fn suppressible_exception( stmt.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(stmt, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs b/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs index cbd5a1b2a2cc9..35f1d6039fcf9 100644 --- a/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs +++ b/crates/ruff_linter/src/rules/flake8_todos/rules/todos.rs @@ -1,6 +1,6 @@ use once_cell::sync::Lazy; use regex::RegexSet; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -235,7 +235,7 @@ pub(crate) fn todos( diagnostics: &mut Vec, todo_comments: &[TodoComment], locator: &Locator, - indexer: &Indexer, + comment_ranges: &CommentRanges, ) { for todo_comment in todo_comments { let TodoComment { @@ -256,12 +256,7 @@ pub(crate) fn todos( let mut has_issue_link = false; let mut curr_range = range; - for next_range in indexer - .comment_ranges() - .iter() - .skip(range_index + 1) - .copied() - { + for next_range in comment_ranges.iter().skip(range_index + 1).copied() { // Ensure that next_comment_range is in the same multiline comment "block" as // comment_range. if !locator diff --git a/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs b/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs index dc1a7c2e1fb9c..3470430384f57 100644 --- a/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs +++ b/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs @@ -491,7 +491,6 @@ fn fix_imports(checker: &Checker, node_id: NodeId, imports: &[ImportBinding]) -> }, at, checker.semantic(), - checker.source_type, )? .into_edits(); diff --git a/crates/ruff_linter/src/rules/isort/annotate.rs b/crates/ruff_linter/src/rules/isort/annotate.rs index 012364d71803e..a30cf78708547 100644 --- a/crates/ruff_linter/src/rules/isort/annotate.rs +++ b/crates/ruff_linter/src/rules/isort/annotate.rs @@ -1,4 +1,5 @@ -use ruff_python_ast::{self as ast, PySourceType, Stmt}; +use ruff_python_ast::{self as ast, Stmt}; +use ruff_python_parser::Tokens; use ruff_text_size::{Ranged, TextRange}; use ruff_source_file::Locator; @@ -13,7 +14,7 @@ pub(crate) fn annotate_imports<'a>( comments: Vec>, locator: &Locator<'a>, split_on_trailing_comma: bool, - source_type: PySourceType, + tokens: &Tokens, ) -> Vec> { let mut comments_iter = comments.into_iter().peekable(); @@ -120,7 +121,7 @@ pub(crate) fn annotate_imports<'a>( names: aliases, level: *level, trailing_comma: if split_on_trailing_comma { - trailing_comma(import, locator, source_type) + trailing_comma(import, tokens) } else { TrailingComma::default() }, diff --git a/crates/ruff_linter/src/rules/isort/comments.rs b/crates/ruff_linter/src/rules/isort/comments.rs index daec232098375..d2c88213cc63f 100644 --- a/crates/ruff_linter/src/rules/isort/comments.rs +++ b/crates/ruff_linter/src/rules/isort/comments.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -20,10 +20,9 @@ impl Ranged for Comment<'_> { pub(crate) fn collect_comments<'a>( range: TextRange, locator: &'a Locator, - indexer: &'a Indexer, + comment_ranges: &'a CommentRanges, ) -> Vec> { - indexer - .comment_ranges() + comment_ranges .comments_in_range(range) .iter() .map(|range| Comment { diff --git a/crates/ruff_linter/src/rules/isort/helpers.rs b/crates/ruff_linter/src/rules/isort/helpers.rs index 6f519f892335f..50b8b7ffca49b 100644 --- a/crates/ruff_linter/src/rules/isort/helpers.rs +++ b/crates/ruff_linter/src/rules/isort/helpers.rs @@ -1,5 +1,5 @@ -use ruff_python_ast::{PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::Stmt; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::PythonWhitespace; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::Ranged; @@ -8,31 +8,23 @@ use crate::rules::isort::types::TrailingComma; /// Return `true` if a `Stmt::ImportFrom` statement ends with a magic /// trailing comma. -pub(super) fn trailing_comma( - stmt: &Stmt, - locator: &Locator, - source_type: PySourceType, -) -> TrailingComma { - let contents = locator.slice(stmt); +pub(super) fn trailing_comma(stmt: &Stmt, tokens: &Tokens) -> TrailingComma { let mut count = 0u32; let mut trailing_comma = TrailingComma::Absent; - for (tok, _) in lexer::lex_starts_at(contents, source_type.as_mode(), stmt.start()).flatten() { - if matches!(tok, Tok::Lpar) { - count = count.saturating_add(1); - } - if matches!(tok, Tok::Rpar) { - count = count.saturating_sub(1); + for token in tokens.in_range(stmt.range()) { + match token.kind() { + TokenKind::Lpar => count = count.saturating_add(1), + TokenKind::Rpar => count = count.saturating_sub(1), + _ => {} } if count == 1 { - if matches!( - tok, - Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent | Tok::Comment(_) - ) { - continue; - } else if matches!(tok, Tok::Comma) { - trailing_comma = TrailingComma::Present; - } else { - trailing_comma = TrailingComma::Absent; + match token.kind() { + TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment => continue, + TokenKind::Comma => trailing_comma = TrailingComma::Present, + _ => trailing_comma = TrailingComma::Absent, } } } diff --git a/crates/ruff_linter/src/rules/isort/mod.rs b/crates/ruff_linter/src/rules/isort/mod.rs index 71af19faf1f54..4a82745e6cc92 100644 --- a/crates/ruff_linter/src/rules/isort/mod.rs +++ b/crates/ruff_linter/src/rules/isort/mod.rs @@ -12,6 +12,7 @@ use normalize::normalize_imports; use order::order_imports; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use settings::Settings; use types::EitherImport::{Import, ImportFrom}; @@ -72,6 +73,7 @@ pub(crate) fn format_imports( source_type: PySourceType, target_version: PythonVersion, settings: &Settings, + tokens: &Tokens, ) -> String { let trailer = &block.trailer; let block = annotate_imports( @@ -79,7 +81,7 @@ pub(crate) fn format_imports( comments, locator, settings.split_on_trailing_comma, - source_type, + tokens, ); // Normalize imports (i.e., deduplicate, aggregate `from` imports). diff --git a/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs b/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs index d8564dd5b8735..87265c9cd28d1 100644 --- a/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/add_required_imports.rs @@ -4,9 +4,9 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::is_docstring_stmt; use ruff_python_ast::imports::{Alias, AnyImport, FutureImport, Import, ImportFrom}; -use ruff_python_ast::{self as ast, PySourceType, Stmt, Suite}; +use ruff_python_ast::{self as ast, ModModule, PySourceType, Stmt}; use ruff_python_codegen::Stylist; -use ruff_python_parser::parse_suite; +use ruff_python_parser::{parse_module, Parsed}; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -87,13 +87,13 @@ fn includes_import(stmt: &Stmt, target: &AnyImport) -> bool { #[allow(clippy::too_many_arguments)] fn add_required_import( required_import: &AnyImport, - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, source_type: PySourceType, ) -> Option { // Don't add imports to semantically-empty files. - if python_ast.iter().all(is_docstring_stmt) { + if parsed.suite().iter().all(is_docstring_stmt) { return None; } @@ -103,7 +103,8 @@ fn add_required_import( } // If the import is already present in a top-level block, don't add it. - if python_ast + if parsed + .suite() .iter() .any(|stmt| includes_import(stmt, required_import)) { @@ -116,15 +117,14 @@ fn add_required_import( TextRange::default(), ); diagnostic.set_fix(Fix::safe_edit( - Importer::new(python_ast, locator, stylist) - .add_import(required_import, TextSize::default()), + Importer::new(parsed, locator, stylist).add_import(required_import, TextSize::default()), )); Some(diagnostic) } /// I002 pub(crate) fn add_required_imports( - python_ast: &Suite, + parsed: &Parsed, locator: &Locator, stylist: &Stylist, settings: &LinterSettings, @@ -135,7 +135,7 @@ pub(crate) fn add_required_imports( .required_imports .iter() .flat_map(|required_import| { - let Ok(body) = parse_suite(required_import) else { + let Ok(body) = parse_module(required_import).map(Parsed::into_suite) else { error!("Failed to parse required import: `{}`", required_import); return vec![]; }; @@ -165,7 +165,7 @@ pub(crate) fn add_required_imports( }, level: *level, }), - python_ast, + parsed, locator, stylist, source_type, @@ -182,7 +182,7 @@ pub(crate) fn add_required_imports( as_name: name.asname.as_deref(), }, }), - python_ast, + parsed, locator, stylist, source_type, diff --git a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs index e571271d08c00..7e0c3be59d6d5 100644 --- a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs @@ -5,9 +5,10 @@ use itertools::{EitherOrBoth, Itertools}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::whitespace::trailing_lines_end; -use ruff_python_ast::{PySourceType, Stmt}; +use ruff_python_ast::{ModModule, PySourceType, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_parser::Parsed; use ruff_python_trivia::{leading_indentation, textwrap::indent, PythonWhitespace}; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::{Ranged, TextRange}; @@ -78,7 +79,7 @@ fn matches_ignoring_indentation(val1: &str, val2: &str) -> bool { }) } -#[allow(clippy::cast_sign_loss)] +#[allow(clippy::cast_sign_loss, clippy::too_many_arguments)] /// I001 pub(crate) fn organize_imports( block: &Block, @@ -88,6 +89,7 @@ pub(crate) fn organize_imports( settings: &LinterSettings, package: Option<&Path>, source_type: PySourceType, + parsed: &Parsed, ) -> Option { let indentation = locator.slice(extract_indentation_range(&block.imports, locator)); let indentation = leading_indentation(indentation); @@ -106,7 +108,7 @@ pub(crate) fn organize_imports( let comments = comments::collect_comments( TextRange::new(range.start(), locator.full_line_end(range.end())), locator, - indexer, + parsed.comment_ranges(), ); let trailing_line_end = if block.trailer.is_none() { @@ -128,6 +130,7 @@ pub(crate) fn organize_imports( source_type, settings.target_version, &settings.isort, + parsed.tokens(), ); // Expand the span the entire range, including leading and trailing space. diff --git a/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs b/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs index f9586b975b6d1..463ef0a4f550f 100644 --- a/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs +++ b/crates/ruff_linter/src/rules/mccabe/rules/function_is_too_complex.rs @@ -177,10 +177,15 @@ pub(crate) fn function_is_too_complex( mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_ast::Suite; + use ruff_python_parser::parse_module; use super::get_complexity_number; + fn parse_suite(source: &str) -> Result { + Ok(parse_module(source)?.into_suite()) + } + #[test] fn trivial() -> Result<()> { let source = r" diff --git a/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs index e1766b27c4704..ed4446660cb21 100644 --- a/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff_linter/src/rules/pandas_vet/rules/inplace_argument.rs @@ -93,7 +93,7 @@ pub(crate) fn inplace_argument(checker: &mut Checker, call: &ast::ExprCall) { call, keyword, statement, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ) { diagnostic.set_fix(fix); diff --git a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs index cb1988746c283..b724f15659e3f 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs @@ -2,8 +2,7 @@ use std::ops::Deref; use unicode_width::UnicodeWidthStr; -use ruff_python_index::Indexer; -use ruff_python_trivia::is_pragma_comment; +use ruff_python_trivia::{is_pragma_comment, CommentRanges}; use ruff_source_file::Line; use ruff_text_size::{TextLen, TextRange}; @@ -20,7 +19,7 @@ impl Overlong { /// otherwise. pub(super) fn try_from_line( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, limit: LineLength, task_tags: &[String], tab_size: IndentWidth, @@ -40,7 +39,7 @@ impl Overlong { } // Strip trailing comments and re-measure the line, if needed. - let line = StrippedLine::from_line(line, indexer, task_tags); + let line = StrippedLine::from_line(line, comment_ranges, task_tags); let width = match &line { StrippedLine::WithoutPragma(line) => { let width = measure(line.as_str(), tab_size); @@ -119,8 +118,8 @@ enum StrippedLine<'a> { impl<'a> StrippedLine<'a> { /// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like /// `# type: ignore`) or, if necessary, a task comment (like `# TODO`). - fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self { - let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else { + fn from_line(line: &'a Line<'a>, comment_ranges: &CommentRanges, task_tags: &[String]) -> Self { + let [comment_range] = comment_ranges.comments_in_range(line.range()) else { return Self::Unchanged(line); }; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs index ad6b6478cbef1..172ff40e5b6c9 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs @@ -1,5 +1,7 @@ use itertools::Itertools; use ruff_notebook::CellOffsets; +use ruff_python_parser::Token; +use ruff_python_parser::Tokens; use std::cmp::Ordering; use std::iter::Peekable; use std::num::NonZeroU32; @@ -12,7 +14,7 @@ use ruff_diagnostics::Fix; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; -use ruff_python_parser::{TokenKind, TokenKindIter}; +use ruff_python_parser::TokenKind; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextRange; use ruff_text_size::TextSize; @@ -381,7 +383,7 @@ struct LogicalLineInfo { /// Iterator that processes tokens until a full logical line (or comment line) is "built". /// It then returns characteristics of that logical line (see `LogicalLineInfo`). struct LinePreprocessor<'a> { - tokens: TokenKindIter<'a>, + tokens: Peekable>, locator: &'a Locator<'a>, indent_width: IndentWidth, /// The start position of the next logical line. @@ -397,13 +399,13 @@ struct LinePreprocessor<'a> { impl<'a> LinePreprocessor<'a> { fn new( - tokens: TokenKindIter<'a>, + tokens: &'a Tokens, locator: &'a Locator, indent_width: IndentWidth, cell_offsets: Option<&'a CellOffsets>, ) -> LinePreprocessor<'a> { LinePreprocessor { - tokens, + tokens: tokens.up_to_first_unknown().iter().peekable(), locator, line_start: TextSize::new(0), max_preceding_blank_lines: BlankLines::Zero, @@ -424,75 +426,80 @@ impl<'a> Iterator for LinePreprocessor<'a> { // Number of consecutive blank lines directly preceding this logical line. let mut blank_lines = BlankLines::Zero; let mut first_logical_line_token: Option<(LogicalLineKind, TextRange)> = None; - let mut last_token: TokenKind = TokenKind::EndOfFile; + let mut last_token = TokenKind::EndOfFile; let mut parens = 0u32; - while let Some((token, range)) = self.tokens.next() { - if matches!(token, TokenKind::Indent | TokenKind::Dedent) { + while let Some(token) = self.tokens.next() { + let (kind, range) = token.as_tuple(); + if matches!(kind, TokenKind::Indent | TokenKind::Dedent) { continue; } - let (logical_line_kind, first_token_range) = if let Some(first_token_range) = - first_logical_line_token - { - first_token_range - } - // At the start of the line... - else { - // Check if we are at the beginning of a cell in a notebook. - if let Some(ref mut cell_offsets) = self.cell_offsets { - if cell_offsets - .peek() - .is_some_and(|offset| offset == &&self.line_start) - { - self.is_beginning_of_cell = true; - cell_offsets.next(); - blank_lines = BlankLines::Zero; - self.max_preceding_blank_lines = BlankLines::Zero; - } + let (logical_line_kind, first_token_range) = + if let Some(first_token_range) = first_logical_line_token { + first_token_range } + // At the start of the line... + else { + // Check if we are at the beginning of a cell in a notebook. + if let Some(ref mut cell_offsets) = self.cell_offsets { + if cell_offsets + .peek() + .is_some_and(|offset| offset == &&self.line_start) + { + self.is_beginning_of_cell = true; + cell_offsets.next(); + blank_lines = BlankLines::Zero; + self.max_preceding_blank_lines = BlankLines::Zero; + } + } - // An empty line - if token == TokenKind::NonLogicalNewline { - blank_lines.add(range); - - self.line_start = range.end(); + // An empty line + if kind == TokenKind::NonLogicalNewline { + blank_lines.add(range); - continue; - } + self.line_start = range.end(); - is_docstring = token == TokenKind::String; - - let logical_line_kind = match token { - TokenKind::Class => LogicalLineKind::Class, - TokenKind::Comment => LogicalLineKind::Comment, - TokenKind::At => LogicalLineKind::Decorator, - TokenKind::Def => LogicalLineKind::Function, - // Lookahead to distinguish `async def` from `async with`. - TokenKind::Async if matches!(self.tokens.peek(), Some((TokenKind::Def, _))) => { - LogicalLineKind::Function + continue; } - TokenKind::Import => LogicalLineKind::Import, - TokenKind::From => LogicalLineKind::FromImport, - _ => LogicalLineKind::Other, - }; - first_logical_line_token = Some((logical_line_kind, range)); + is_docstring = kind == TokenKind::String; + + let logical_line_kind = match kind { + TokenKind::Class => LogicalLineKind::Class, + TokenKind::Comment => LogicalLineKind::Comment, + TokenKind::At => LogicalLineKind::Decorator, + TokenKind::Def => LogicalLineKind::Function, + // Lookahead to distinguish `async def` from `async with`. + TokenKind::Async + if self + .tokens + .peek() + .is_some_and(|token| token.kind() == TokenKind::Def) => + { + LogicalLineKind::Function + } + TokenKind::Import => LogicalLineKind::Import, + TokenKind::From => LogicalLineKind::FromImport, + _ => LogicalLineKind::Other, + }; + + first_logical_line_token = Some((logical_line_kind, range)); - (logical_line_kind, range) - }; + (logical_line_kind, range) + }; - if !token.is_trivia() { + if !kind.is_trivia() { line_is_comment_only = false; } // A docstring line is composed only of the docstring (TokenKind::String) and trivia tokens. // (If a comment follows a docstring, we still count the line as a docstring) - if token != TokenKind::String && !token.is_trivia() { + if kind != TokenKind::String && !kind.is_trivia() { is_docstring = false; } - match token { + match kind { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } @@ -538,8 +545,8 @@ impl<'a> Iterator for LinePreprocessor<'a> { _ => {} } - if !token.is_trivia() { - last_token = token; + if !kind.is_trivia() { + last_token = kind; } } @@ -722,7 +729,7 @@ impl<'a> BlankLinesChecker<'a> { } /// E301, E302, E303, E304, E305, E306 - pub(crate) fn check_lines(&self, tokens: TokenKindIter<'a>, diagnostics: &mut Vec) { + pub(crate) fn check_lines(&self, tokens: &Tokens, diagnostics: &mut Vec) { let mut prev_indent_length: Option = None; let mut state = BlankLinesState::default(); let line_preprocessor = diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs index f22c771fc7ae0..bdfb2e9629e46 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs @@ -1,7 +1,9 @@ +use std::slice::Iter; + use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextSize}; use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; @@ -99,7 +101,7 @@ impl AlwaysFixableViolation for UselessSemicolon { /// E701, E702, E703 pub(crate) fn compound_statements( diagnostics: &mut Vec, - mut tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, indexer: &Indexer, source_type: PySourceType, @@ -125,33 +127,26 @@ pub(crate) fn compound_statements( // This is used to allow `class C: ...`-style definitions in stubs. let mut allow_ellipsis = false; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + // Track the nesting level. + let mut nesting = 0u32; // Track indentation. let mut indent = 0u32; - while let Some((token, range)) = tokens.next() { - match token { - TokenKind::Lpar => { - par_count = par_count.saturating_add(1); - } - TokenKind::Rpar => { - par_count = par_count.saturating_sub(1); - } - TokenKind::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - TokenKind::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - TokenKind::Lbrace => { - brace_count = brace_count.saturating_add(1); + // Use an iterator to allow passing it around. + let mut token_iter = tokens.up_to_first_unknown().iter(); + + loop { + let Some(token) = token_iter.next() else { + break; + }; + + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - TokenKind::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } TokenKind::Ellipsis => { if allow_ellipsis { @@ -168,28 +163,27 @@ pub(crate) fn compound_statements( _ => {} } - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - match token { + match token.kind() { TokenKind::Newline => { - if let Some((start, end)) = semi { + if let Some(range) = semi { if !(source_type.is_ipynb() && indent == 0 && cell_offsets - .and_then(|cell_offsets| cell_offsets.containing_range(range.start())) + .and_then(|cell_offsets| cell_offsets.containing_range(token.start())) .is_some_and(|cell_range| { - !has_non_trivia_tokens_till(tokens.clone(), cell_range.end()) + !has_non_trivia_tokens_till(token_iter.clone(), cell_range.end()) })) { - let mut diagnostic = - Diagnostic::new(UselessSemicolon, TextRange::new(start, end)); + let mut diagnostic = Diagnostic::new(UselessSemicolon, range); diagnostic.set_fix(Fix::safe_edit(Edit::deletion( indexer - .preceded_by_continuations(start, locator) - .unwrap_or(start), - end, + .preceded_by_continuations(range.start(), locator) + .unwrap_or(range.start()), + range.end(), ))); diagnostics.push(diagnostic); } @@ -225,14 +219,14 @@ pub(crate) fn compound_statements( || while_.is_some() || with.is_some() { - colon = Some((range.start(), range.end())); + colon = Some(token.range()); // Allow `class C: ...`-style definitions. allow_ellipsis = true; } } TokenKind::Semi => { - semi = Some((range.start(), range.end())); + semi = Some(token.range()); allow_ellipsis = false; } TokenKind::Comment @@ -240,22 +234,16 @@ pub(crate) fn compound_statements( | TokenKind::Dedent | TokenKind::NonLogicalNewline => {} _ => { - if let Some((start, end)) = semi { - diagnostics.push(Diagnostic::new( - MultipleStatementsOnOneLineSemicolon, - TextRange::new(start, end), - )); + if let Some(range) = semi { + diagnostics.push(Diagnostic::new(MultipleStatementsOnOneLineSemicolon, range)); // Reset. semi = None; allow_ellipsis = false; } - if let Some((start, end)) = colon { - diagnostics.push(Diagnostic::new( - MultipleStatementsOnOneLineColon, - TextRange::new(start, end), - )); + if let Some(range) = colon { + diagnostics.push(Diagnostic::new(MultipleStatementsOnOneLineColon, range)); // Reset. colon = None; @@ -276,7 +264,7 @@ pub(crate) fn compound_statements( } } - match token { + match token.kind() { TokenKind::Lambda => { // Reset. colon = None; @@ -294,40 +282,40 @@ pub(crate) fn compound_statements( with = None; } TokenKind::Case => { - case = Some((range.start(), range.end())); + case = Some(token.range()); } TokenKind::If => { - if_ = Some((range.start(), range.end())); + if_ = Some(token.range()); } TokenKind::While => { - while_ = Some((range.start(), range.end())); + while_ = Some(token.range()); } TokenKind::For => { - for_ = Some((range.start(), range.end())); + for_ = Some(token.range()); } TokenKind::Try => { - try_ = Some((range.start(), range.end())); + try_ = Some(token.range()); } TokenKind::Except => { - except = Some((range.start(), range.end())); + except = Some(token.range()); } TokenKind::Finally => { - finally = Some((range.start(), range.end())); + finally = Some(token.range()); } TokenKind::Elif => { - elif = Some((range.start(), range.end())); + elif = Some(token.range()); } TokenKind::Else => { - else_ = Some((range.start(), range.end())); + else_ = Some(token.range()); } TokenKind::Class => { - class = Some((range.start(), range.end())); + class = Some(token.range()); } TokenKind::With => { - with = Some((range.start(), range.end())); + with = Some(token.range()); } TokenKind::Match => { - match_ = Some((range.start(), range.end())); + match_ = Some(token.range()); } _ => {} }; @@ -336,13 +324,13 @@ pub(crate) fn compound_statements( /// Returns `true` if there are any non-trivia tokens from the given token /// iterator till the given end offset. -fn has_non_trivia_tokens_till(tokens: TokenKindIter, cell_end: TextSize) -> bool { - for (token, tok_range) in tokens { - if tok_range.start() >= cell_end { +fn has_non_trivia_tokens_till(tokens: Iter<'_, Token>, cell_end: TextSize) -> bool { + for token in tokens { + if token.start() >= cell_end { return false; } if !matches!( - token, + token.kind(), TokenKind::Newline | TokenKind::Comment | TokenKind::EndOfFile diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs index b13c461e19fc2..5661f62036f66 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Line; use crate::rules::pycodestyle::overlong::Overlong; @@ -84,13 +84,13 @@ impl Violation for DocLineTooLong { /// W505 pub(crate) fn doc_line_too_long( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Option { let limit = settings.pycodestyle.max_doc_length?; Overlong::try_from_line( line, - indexer, + comment_ranges, limit, if settings.pycodestyle.ignore_overlong_task_comments { &settings.task_tags diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs index a722344fa050b..54b1bf09fd15f 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Line; use crate::rules::pycodestyle::overlong::Overlong; @@ -82,14 +82,14 @@ impl Violation for LineTooLong { /// E501 pub(crate) fn line_too_long( line: &Line, - indexer: &Indexer, + comment_ranges: &CommentRanges, settings: &LinterSettings, ) -> Option { let limit = settings.pycodestyle.max_line_length; Overlong::try_from_line( line, - indexer, + comment_ranges, limit, if settings.pycodestyle.ignore_overlong_task_comments { &settings.task_tags diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs index 68a9ba3d7f4e7..3489aa0c56e6a 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/literal_comparisons.rs @@ -324,7 +324,7 @@ pub(crate) fn literal_comparisons(checker: &mut Checker, compare: &ast::ExprComp &ops, &compare.comparators, compare.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ); for diagnostic in &mut diagnostics { diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs index 606972bcf0c38..a483187e574cd 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -14,10 +14,9 @@ use std::fmt::{Debug, Formatter}; use std::iter::FusedIterator; use bitflags::bitflags; -use ruff_python_parser::lexer::LexResult; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; -use ruff_python_parser::TokenKind; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::is_python_whitespace; use ruff_source_file::Locator; @@ -60,17 +59,16 @@ pub(crate) struct LogicalLines<'a> { } impl<'a> LogicalLines<'a> { - pub(crate) fn from_tokens(tokens: &'a [LexResult], locator: &'a Locator<'a>) -> Self { + pub(crate) fn from_tokens(tokens: &Tokens, locator: &'a Locator<'a>) -> Self { assert!(u32::try_from(tokens.len()).is_ok()); let mut builder = LogicalLinesBuilder::with_capacity(tokens.len()); let mut parens = 0u32; - for (token, range) in tokens.iter().flatten() { - let token_kind = TokenKind::from_token(token); - builder.push_token(token_kind, *range); + for token in tokens.up_to_first_unknown() { + builder.push_token(token.kind(), token.range()); - match token_kind { + match token.kind() { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } @@ -506,9 +504,7 @@ struct Line { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; - + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use super::LogicalLines; @@ -592,9 +588,9 @@ if False: } fn assert_logical_lines(contents: &str, expected: &[&str]) { - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let parsed = parse_module(contents).unwrap(); let locator = Locator::new(contents); - let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) + let actual: Vec = LogicalLines::from_tokens(parsed.tokens(), &locator) .into_iter() .map(|line| line.text_trimmed()) .map(ToString::to_string) diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs index 6990d66f76ac4..1602e84f79644 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/not_tests.rs @@ -104,7 +104,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) { &[CmpOp::NotIn], comparators, unary_op.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), unary_op.range(), @@ -125,7 +125,7 @@ pub(crate) fn not_tests(checker: &mut Checker, unary_op: &ast::ExprUnaryOp) { &[CmpOp::IsNot], comparators, unary_op.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), unary_op.range(), diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs index f117210d3263d..c34ce2216bc5a 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs @@ -1,7 +1,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange, TextSize}; /// ## What it does /// Checks for files with multiple trailing blank lines. @@ -54,22 +54,19 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile { } /// W391 -pub(crate) fn too_many_newlines_at_end_of_file( - diagnostics: &mut Vec, - tokens: TokenKindIter, -) { +pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec, tokens: &Tokens) { let mut num_trailing_newlines = 0u32; let mut start: Option = None; let mut end: Option = None; // Count the number of trailing newlines. - for (token, range) in tokens.rev() { - match token { + for token in tokens.up_to_first_unknown().iter().rev() { + match token.kind() { TokenKind::NonLogicalNewline | TokenKind::Newline => { if num_trailing_newlines == 0 { - end = Some(range.end()); + end = Some(token.end()); } - start = Some(range.end()); + start = Some(token.end()); num_trailing_newlines += 1; } TokenKind::Dedent => continue, diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index 81bc61c1f1850..f88cc6f285272 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -17,12 +17,12 @@ mod tests { use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; - use ruff_python_parser::AsMode; + use ruff_python_trivia::textwrap::dedent; use ruff_source_file::Locator; use ruff_text_size::Ranged; - use crate::linter::{check_path, LinterResult, TokenSource}; + use crate::linter::{check_path, LinterResult}; use crate::registry::{AsRule, Linter, Rule}; use crate::rules::pyflakes; use crate::settings::types::PreviewMode; @@ -638,12 +638,13 @@ mod tests { let source_type = PySourceType::default(); let source_kind = SourceKind::Python(contents.to_string()); let settings = LinterSettings::for_rules(Linter::Pyflakes.rules()); - let tokens = ruff_python_parser::tokenize(&contents, source_type.as_mode()); + let parsed = + ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let locator = Locator::new(&contents); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(&settings), &locator, &indexer, @@ -662,7 +663,7 @@ mod tests { flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); diagnostics.sort_by_key(Ranged::start); let actual = diagnostics diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index aaf4761679b4c..5301e1cada51a 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -4,8 +4,8 @@ use ruff_python_ast::{CmpOp, Expr}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers; -use ruff_python_parser::{lexer, Mode, Tok}; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; @@ -96,7 +96,7 @@ pub(crate) fn invalid_literal_comparison( { let mut diagnostic = Diagnostic::new(IsLiteral { cmp_op: op.into() }, expr.range()); if lazy_located.is_none() { - lazy_located = Some(locate_cmp_ops(expr, checker.locator().contents())); + lazy_located = Some(locate_cmp_ops(expr, checker.parsed().tokens())); } if let Some(located_op) = lazy_located.as_ref().and_then(|located| located.get(index)) { assert_eq!(located_op.op, *op); @@ -110,7 +110,7 @@ pub(crate) fn invalid_literal_comparison( } { diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( content, - located_op.range + expr.start(), + located_op.range, ))); } } else { @@ -138,102 +138,83 @@ impl From<&CmpOp> for IsCmpOp { } } -/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate -/// ranges. +/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate ranges. /// -/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes. -/// `CPython` doesn't either. This method iterates over the token stream and -/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges. -fn locate_cmp_ops(expr: &Expr, source: &str) -> Vec { - // If `Expr` is a multi-line expression, we need to parenthesize it to - // ensure that it's lexed correctly. - let contents = &source[expr.range()]; - let parenthesized_contents = format!("({contents})"); - let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression) - .flatten() - .skip(1) - .map(|(tok, range)| (tok, range - TextSize::from(1))) - .filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_))) +/// This method iterates over the token stream and re-identifies [`CmpOp`] nodes, annotating them +/// with valid ranges. +fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec { + let mut tok_iter = tokens + .in_range(expr.range()) + .iter() + .filter(|token| !token.is_trivia()) .peekable(); let mut ops: Vec = vec![]; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + // Track the nesting level. + let mut nesting = 0u32; loop { - let Some((tok, range)) = tok_iter.next() else { + let Some(token) = tok_iter.next() else { break; }; - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Rpar => { - par_count = par_count.saturating_sub(1); - } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - Tok::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - Tok::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } _ => {} } - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - match tok { - Tok::Not => { - if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_in()) { + match token.kind() { + TokenKind::Not => { + if let Some(next_token) = tok_iter.next_if(|token| token.kind() == TokenKind::In) { ops.push(LocatedCmpOp::new( - TextRange::new(range.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::NotIn, )); } } - Tok::In => { - ops.push(LocatedCmpOp::new(range, CmpOp::In)); + TokenKind::In => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::In)); } - Tok::Is => { - let op = if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_not()) { + TokenKind::Is => { + let op = if let Some(next_token) = + tok_iter.next_if(|token| token.kind() == TokenKind::Not) + { LocatedCmpOp::new( - TextRange::new(range.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::IsNot, ) } else { - LocatedCmpOp::new(range, CmpOp::Is) + LocatedCmpOp::new(token.range(), CmpOp::Is) }; ops.push(op); } - Tok::NotEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::NotEq)); + TokenKind::NotEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::NotEq)); } - Tok::EqEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::Eq)); + TokenKind::EqEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Eq)); } - Tok::GreaterEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::GtE)); + TokenKind::GreaterEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::GtE)); } - Tok::Greater => { - ops.push(LocatedCmpOp::new(range, CmpOp::Gt)); + TokenKind::Greater => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Gt)); } - Tok::LessEqual => { - ops.push(LocatedCmpOp::new(range, CmpOp::LtE)); + TokenKind::LessEqual => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::LtE)); } - Tok::Less => { - ops.push(LocatedCmpOp::new(range, CmpOp::Lt)); + TokenKind::Less => { + ops.push(LocatedCmpOp::new(token.range(), CmpOp::Lt)); } _ => {} } @@ -266,12 +247,16 @@ mod tests { use super::{locate_cmp_ops, LocatedCmpOp}; + fn extract_cmp_op_locations(source: &str) -> Result> { + let parsed = parse_expression(source)?; + Ok(locate_cmp_ops(parsed.expr(), parsed.tokens())) + } + #[test] - fn extract_cmp_op_location() -> Result<()> { + fn test_locate_cmp_ops() -> Result<()> { let contents = "x == 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::Eq @@ -279,9 +264,8 @@ mod tests { ); let contents = "x != 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::NotEq @@ -289,9 +273,8 @@ mod tests { ); let contents = "x is 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::Is @@ -299,9 +282,8 @@ mod tests { ); let contents = "x is not 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(8), CmpOp::IsNot @@ -309,9 +291,8 @@ mod tests { ); let contents = "x in 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::In @@ -319,9 +300,8 @@ mod tests { ); let contents = "x not in 1"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(8), CmpOp::NotIn @@ -329,9 +309,8 @@ mod tests { ); let contents = "x != (1 is not 2)"; - let expr = parse_expression(contents)?; assert_eq!( - locate_cmp_ops(&expr, contents), + extract_cmp_op_locations(contents)?, vec![LocatedCmpOp::new( TextSize::from(2)..TextSize::from(4), CmpOp::NotEq diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs b/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs index 5575e15410f31..66fcfdc0eaefd 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/repeated_keys.rs @@ -169,7 +169,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i - 1).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i - 1).range()) @@ -177,7 +177,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i).range()) @@ -201,7 +201,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i - 1).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i - 1).range()) @@ -209,7 +209,7 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) { parenthesized_range( dict.value(i).into(), dict.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or_else(|| dict.value(i).range()) diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs index b84fcd27d6135..934a4d0af97c8 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs @@ -4,10 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::contains_effect; use ruff_python_ast::parenthesize::parenthesized_range; -use ruff_python_ast::{self as ast, PySourceType, Stmt}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::{self as ast, Stmt}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_semantic::{Binding, Scope}; -use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; @@ -65,22 +64,13 @@ impl Violation for UnusedVariable { } /// Return the [`TextRange`] of the token before the next match of the predicate -fn match_token_before( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_before(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); - for ((_, range), (tok, _)) in lexer::lex_starts_at(contents, source_type.as_mode(), location) - .flatten() - .tuple_windows() - { - if f(tok) { - return Some(range); + for (prev, current) in tokens.after(location).iter().tuple_windows() { + if f(current.kind()) { + return Some(prev.range()); } } None @@ -88,55 +78,31 @@ where /// Return the [`TextRange`] of the token after the next match of the predicate, skipping over /// any bracketed expressions. -fn match_token_after( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_after(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; + let mut nesting = 0u32; - for ((tok, _), (_, range)) in lexer::lex_starts_at(contents, source_type.as_mode(), location) - .flatten() - .tuple_windows() - { - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); - } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); - } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); + for (current, next) in tokens.after(location).iter().tuple_windows() { + match current.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Rpar => { - par_count = par_count.saturating_sub(1); - } - Tok::Rsqb => { - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Rbrace => { - brace_count = brace_count.saturating_sub(1); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + nesting = nesting.saturating_sub(1); } _ => {} } // If we're in nested brackets, continue. - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - if f(tok) { - return Some(range); + if f(current.kind()) { + return Some(next.range()); } } None @@ -144,61 +110,34 @@ where /// Return the [`TextRange`] of the token matching the predicate or the first mismatched /// bracket, skipping over any bracketed expressions. -fn match_token_or_closing_brace( - location: TextSize, - locator: &Locator, - source_type: PySourceType, - f: F, -) -> Option +fn match_token_or_closing_brace(tokens: &Tokens, location: TextSize, f: F) -> Option where - F: Fn(Tok) -> bool, + F: Fn(TokenKind) -> bool, { - let contents = locator.after(location); + // Track the nesting level. + let mut nesting = 0u32; - // Track the bracket depth. - let mut par_count = 0u32; - let mut sqb_count = 0u32; - let mut brace_count = 0u32; - - for (tok, range) in lexer::lex_starts_at(contents, source_type.as_mode(), location).flatten() { - match tok { - Tok::Lpar => { - par_count = par_count.saturating_add(1); - } - Tok::Lsqb => { - sqb_count = sqb_count.saturating_add(1); + for token in tokens.after(location) { + match token.kind() { + TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => { + nesting = nesting.saturating_add(1); } - Tok::Lbrace => { - brace_count = brace_count.saturating_add(1); - } - Tok::Rpar => { - if par_count == 0 { - return Some(range); - } - par_count = par_count.saturating_sub(1); - } - Tok::Rsqb => { - if sqb_count == 0 { - return Some(range); - } - sqb_count = sqb_count.saturating_sub(1); - } - Tok::Rbrace => { - if brace_count == 0 { - return Some(range); + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => { + if nesting == 0 { + return Some(token.range()); } - brace_count = brace_count.saturating_sub(1); + nesting = nesting.saturating_sub(1); } _ => {} } // If we're in nested brackets, continue. - if par_count > 0 || sqb_count > 0 || brace_count > 0 { + if nesting > 0 { continue; } - if f(tok) { - return Some(range); + if f(token.kind()) { + return Some(token.range()); } } None @@ -226,18 +165,16 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { let start = parenthesized_range( target.into(), statement.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(target.range()) .start(); - let end = match_token_after( - target.end(), - checker.locator(), - checker.source_type, - |tok| tok == Tok::Equal, - )? - .start(); + let end = + match_token_after(checker.parsed().tokens(), target.end(), |token| { + token == TokenKind::Equal + })? + .start(); let edit = Edit::deletion(start, end); Some(Fix::unsafe_edit(edit)) } else { @@ -269,11 +206,10 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { // If the expression is complex (`x = foo()`), remove the assignment, // but preserve the right-hand side. let start = statement.start(); - let end = - match_token_after(start, checker.locator(), checker.source_type, |tok| { - tok == Tok::Equal - })? - .start(); + let end = match_token_after(checker.parsed().tokens(), start, |token| { + token == TokenKind::Equal + })? + .start(); let edit = Edit::deletion(start, end); Some(Fix::unsafe_edit(edit)) } else { @@ -293,21 +229,18 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option { if optional_vars.range() == binding.range() { // Find the first token before the `as` keyword. let start = match_token_before( + checker.parsed().tokens(), item.context_expr.start(), - checker.locator(), - checker.source_type, - |tok| tok == Tok::As, + |token| token == TokenKind::As, )? .end(); // Find the first colon, comma, or closing bracket after the `as` keyword. - let end = match_token_or_closing_brace( - start, - checker.locator(), - checker.source_type, - |tok| tok == Tok::Colon || tok == Tok::Comma, - )? - .start(); + let end = + match_token_or_closing_brace(checker.parsed().tokens(), start, |token| { + token == TokenKind::Colon || token == TokenKind::Comma + })? + .start(); let edit = Edit::deletion(start, end); return Some(Fix::unsafe_edit(edit)); diff --git a/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs b/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs index f594be42940d2..91b08c9c08a86 100644 --- a/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs +++ b/crates/ruff_linter/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs @@ -5,7 +5,7 @@ use regex::Regex; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -51,10 +51,10 @@ impl Violation for BlanketTypeIgnore { /// PGH003 pub(crate) fn blanket_type_ignore( diagnostics: &mut Vec, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator, ) { - for range in indexer.comment_ranges() { + for range in comment_ranges { let line = locator.slice(*range); // Match, e.g., `# type: ignore` or `# type: ignore[attr-defined]`. diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs index 3fa235beb97c5..dfca0b6f209c6 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -1,7 +1,6 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; -use ruff_python_trivia::is_python_whitespace; +use ruff_python_trivia::{is_python_whitespace, CommentRanges}; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; @@ -45,12 +44,12 @@ impl Violation for EmptyComment { /// PLR2044 pub(crate) fn empty_comments( diagnostics: &mut Vec, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator, ) { - let block_comments = indexer.comment_ranges().block_comments(locator); + let block_comments = comment_ranges.block_comments(locator); - for range in indexer.comment_ranges() { + for range in comment_ranges { // Ignore comments that are part of multi-line "comment blocks". if block_comments.binary_search(&range.start()).is_ok() { continue; diff --git a/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs b/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs index 7bfdae8e14159..0e9eceb984f26 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/if_stmt_min_max.rs @@ -160,7 +160,7 @@ pub(crate) fn if_stmt_min_max(checker: &mut Checker, stmt_if: &ast::StmtIf) { parenthesized_range( body_target.into(), body.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents() ) .unwrap_or(body_target.range()) diff --git a/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs b/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs index d3d5d9e182ced..a78d4de6771fb 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/nested_min_max.rs @@ -156,7 +156,7 @@ pub(crate) fn nested_min_max( }) { let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range()); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs b/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs index c6ff569fcbd06..03690dd350561 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/subprocess_run_without_check.rs @@ -76,7 +76,7 @@ pub(crate) fn subprocess_run_without_check(checker: &mut Checker, call: &ast::Ex add_argument( "check=False", &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ), // If the function call contains `**kwargs`, mark the fix as unsafe. diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs index 409c85f109b2c..e2b76a4101ff8 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_branches.rs @@ -254,13 +254,13 @@ pub(crate) fn too_many_branches( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_parser::parse_module; use super::num_branches; fn test_helper(source: &str, expected_num_branches: usize) -> Result<()> { - let branches = parse_suite(source)?; - assert_eq!(num_branches(&branches), expected_num_branches); + let parsed = parse_module(source)?; + assert_eq!(num_branches(parsed.suite()), expected_num_branches); Ok(()) } diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs index 39f573bb11fe7..5e6e34dba86b0 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_return_statements.rs @@ -98,13 +98,13 @@ pub(crate) fn too_many_return_statements( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + use ruff_python_parser::parse_module; use super::num_returns; fn test_helper(source: &str, expected: usize) -> Result<()> { - let stmts = parse_suite(source)?; - assert_eq!(num_returns(&stmts), expected); + let parsed = parse_module(source)?; + assert_eq!(num_returns(parsed.suite()), expected); Ok(()) } diff --git a/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs b/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs index d2dbf632ac67f..3ab6f9fb15d1b 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/too_many_statements.rs @@ -158,10 +158,16 @@ pub(crate) fn too_many_statements( #[cfg(test)] mod tests { use anyhow::Result; - use ruff_python_parser::parse_suite; + + use ruff_python_ast::Suite; + use ruff_python_parser::parse_module; use super::num_statements; + fn parse_suite(source: &str) -> Result { + Ok(parse_module(source)?.into_suite()) + } + #[test] fn pass() -> Result<()> { let source: &str = r" diff --git a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs index c5f6b9370343c..8fc8c9692a99b 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs @@ -175,7 +175,7 @@ fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix { })) ), &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), )) } @@ -190,7 +190,7 @@ fn generate_import_fix(checker: &Checker, call: &ast::ExprCall) -> Result { let argument_edit = add_argument( &format!("encoding={binding}(False)"), &call.arguments, - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ); Ok(Fix::unsafe_edits(import_edit, [argument_edit])) diff --git a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs index 7f259e2f9a30f..65486abee28e7 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs @@ -1,52 +1,49 @@ -use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_ast::StmtImportFrom; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; /// Remove any imports matching `members` from an import-from statement. -pub(crate) fn remove_import_members(contents: &str, members: &[&str]) -> String { - let mut names: Vec = vec![]; - let mut commas: Vec = vec![]; - let mut removal_indices: Vec = vec![]; - - // Find all Tok::Name tokens that are not preceded by Tok::As, and all - // Tok::Comma tokens. - let mut prev_tok = None; - for (tok, range) in lexer::lex(contents, Mode::Module) - .flatten() - .skip_while(|(tok, _)| !matches!(tok, Tok::Import)) - { - if let Tok::Name { name } = &tok { - if matches!(prev_tok, Some(Tok::As)) { - // Adjust the location to take the alias into account. - let last_range = names.last_mut().unwrap(); - *last_range = TextRange::new(last_range.start(), range.end()); +pub(crate) fn remove_import_members( + locator: &Locator<'_>, + import_from_stmt: &StmtImportFrom, + tokens: &Tokens, + members_to_remove: &[&str], +) -> String { + let commas: Vec = tokens + .in_range(import_from_stmt.range()) + .iter() + .skip_while(|token| token.kind() != TokenKind::Import) + .filter_map(|token| { + if token.kind() == TokenKind::Comma { + Some(token.range()) } else { - if members.contains(&&**name) { - removal_indices.push(names.len()); - } - names.push(range); + None } - } else if matches!(tok, Tok::Comma) { - commas.push(range); - } - prev_tok = Some(tok); - } + }) + .collect(); // Reconstruct the source code by skipping any names that are in `members`. - let locator = Locator::new(contents); - let mut output = String::with_capacity(contents.len()); - let mut last_pos = TextSize::default(); + let mut output = String::with_capacity(import_from_stmt.range().len().to_usize()); + let mut last_pos = import_from_stmt.start(); let mut is_first = true; - for index in 0..names.len() { - if !removal_indices.contains(&index) { + + for (index, member) in import_from_stmt.names.iter().enumerate() { + if !members_to_remove.contains(&member.name.as_str()) { is_first = false; continue; } let range = if is_first { - TextRange::new(names[index].start(), names[index + 1].start()) + TextRange::new( + import_from_stmt.names[index].start(), + import_from_stmt.names[index + 1].start(), + ) } else { - TextRange::new(commas[index - 1].start(), names[index].end()) + TextRange::new( + commas[index - 1].start(), + import_from_stmt.names[index].end(), + ) }; // Add all contents from `last_pos` to `fix.location`. @@ -61,20 +58,39 @@ pub(crate) fn remove_import_members(contents: &str, members: &[&str]) -> String } // Add the remaining content. - let slice = locator.after(last_pos); + let slice = locator.slice(TextRange::new(last_pos, import_from_stmt.end())); output.push_str(slice); output } #[cfg(test)] mod tests { - use crate::rules::pyupgrade::fixes::remove_import_members; + use ruff_python_parser::parse_module; + use ruff_source_file::Locator; + + use super::remove_import_members; + + fn test_helper(source: &str, members_to_remove: &[&str]) -> String { + let parsed = parse_module(source).unwrap(); + let import_from_stmt = parsed + .suite() + .first() + .expect("source should have one statement") + .as_import_from_stmt() + .expect("first statement should be an import from statement"); + remove_import_members( + &Locator::new(source), + import_from_stmt, + parsed.tokens(), + members_to_remove, + ) + } #[test] fn once() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, baz, qux as q"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -82,7 +98,7 @@ mod tests { fn twice() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, qux as q"; - let actual = remove_import_members(source, &["baz", "bop"]); + let actual = test_helper(source, &["baz", "bop"]); assert_eq!(expected, actual); } @@ -90,7 +106,7 @@ mod tests { fn aliased() { let source = r"from foo import bar, baz, bop as boop, qux as q"; let expected = r"from foo import bar, baz, qux as q"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -98,7 +114,7 @@ mod tests { fn parenthesized() { let source = r"from foo import (bar, baz, bop, qux as q)"; let expected = r"from foo import (bar, baz, qux as q)"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -106,7 +122,7 @@ mod tests { fn last_import() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bar, baz, bop"; - let actual = remove_import_members(source, &["qux"]); + let actual = test_helper(source, &["qux"]); assert_eq!(expected, actual); } @@ -114,7 +130,7 @@ mod tests { fn first_import() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import baz, bop, qux as q"; - let actual = remove_import_members(source, &["bar"]); + let actual = test_helper(source, &["bar"]); assert_eq!(expected, actual); } @@ -122,7 +138,7 @@ mod tests { fn first_two_imports() { let source = r"from foo import bar, baz, bop, qux as q"; let expected = r"from foo import bop, qux as q"; - let actual = remove_import_members(source, &["bar", "baz"]); + let actual = test_helper(source, &["bar", "baz"]); assert_eq!(expected, actual); } @@ -138,7 +154,7 @@ mod tests { bop, qux as q )"; - let actual = remove_import_members(source, &["bar", "baz"]); + let actual = test_helper(source, &["bar", "baz"]); assert_eq!(expected, actual); } @@ -155,7 +171,7 @@ mod tests { baz, qux as q, )"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -171,7 +187,7 @@ mod tests { bar, qux as q, )"; - let actual = remove_import_members(source, &["baz", "bop"]); + let actual = test_helper(source, &["baz", "bop"]); assert_eq!(expected, actual); } @@ -191,7 +207,7 @@ mod tests { # This comment should be retained. qux as q, )"; - let actual = remove_import_members(source, &["bop"]); + let actual = test_helper(source, &["bop"]); assert_eq!(expected, actual); } @@ -211,7 +227,7 @@ mod tests { bop, qux as q, )"; - let actual = remove_import_members(source, &["bar"]); + let actual = test_helper(source, &["bar"]); assert_eq!(expected, actual); } } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs index 7777e13a4b957..f34b688febb01 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs @@ -1,10 +1,11 @@ use itertools::Itertools; -use ruff_python_ast::{Alias, Stmt}; +use ruff_python_ast::{Alias, StmtImportFrom}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::whitespace::indentation; use ruff_python_codegen::Stylist; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -398,29 +399,29 @@ const TYPING_EXTENSIONS_TO_TYPES_313: &[&str] = &["CapsuleType"]; const TYPING_EXTENSIONS_TO_WARNINGS_313: &[&str] = &["deprecated"]; struct ImportReplacer<'a> { - stmt: &'a Stmt, + import_from_stmt: &'a StmtImportFrom, module: &'a str, - members: &'a [Alias], locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, + tokens: &'a Tokens, version: PythonVersion, } impl<'a> ImportReplacer<'a> { const fn new( - stmt: &'a Stmt, + import_from_stmt: &'a StmtImportFrom, module: &'a str, - members: &'a [Alias], locator: &'a Locator<'a>, stylist: &'a Stylist<'a>, + tokens: &'a Tokens, version: PythonVersion, ) -> Self { Self { - stmt, + import_from_stmt, module, - members, locator, stylist, + tokens, version, } } @@ -430,7 +431,7 @@ impl<'a> ImportReplacer<'a> { let mut operations = vec![]; if self.module == "typing" { if self.version >= PythonVersion::Py39 { - for member in self.members { + for member in &self.import_from_stmt.names { if let Some(target) = TYPING_TO_RENAME_PY39.iter().find_map(|(name, target)| { if &member.name == *name { Some(*target) @@ -616,7 +617,7 @@ impl<'a> ImportReplacer<'a> { let fix = Some(matched); Some((operation, fix)) } else { - let indentation = indentation(self.locator, self.stmt); + let indentation = indentation(self.locator, self.import_from_stmt); // If we have matched _and_ unmatched names, but the import is not on its own // line, we can't add a statement after it. For example, if we have @@ -636,7 +637,9 @@ impl<'a> ImportReplacer<'a> { let matched = ImportReplacer::format_import_from(&matched_names, target); let unmatched = fixes::remove_import_members( - self.locator.slice(self.stmt.range()), + self.locator, + self.import_from_stmt, + self.tokens, &matched_names .iter() .map(|name| name.name.as_str()) @@ -664,7 +667,7 @@ impl<'a> ImportReplacer<'a> { fn partition_imports(&self, candidates: &[&str]) -> (Vec<&Alias>, Vec<&Alias>) { let mut matched_names = vec![]; let mut unmatched_names = vec![]; - for name in self.members { + for name in &self.import_from_stmt.names { if candidates.contains(&name.name.as_str()) { matched_names.push(name); } else { @@ -691,21 +694,19 @@ impl<'a> ImportReplacer<'a> { } /// UP035 -pub(crate) fn deprecated_import( - checker: &mut Checker, - stmt: &Stmt, - names: &[Alias], - module: Option<&str>, - level: u32, -) { +pub(crate) fn deprecated_import(checker: &mut Checker, import_from_stmt: &StmtImportFrom) { // Avoid relative and star imports. - if level > 0 { + if import_from_stmt.level > 0 { return; } - if names.first().is_some_and(|name| &name.name == "*") { + if import_from_stmt + .names + .first() + .is_some_and(|name| &name.name == "*") + { return; } - let Some(module) = module else { + let Some(module) = import_from_stmt.module.as_deref() else { return; }; @@ -713,13 +714,12 @@ pub(crate) fn deprecated_import( return; } - let members: Vec = names.iter().map(Clone::clone).collect(); let fixer = ImportReplacer::new( - stmt, + import_from_stmt, module, - &members, checker.locator(), checker.stylist(), + checker.parsed().tokens(), checker.settings.target_version, ); @@ -728,12 +728,12 @@ pub(crate) fn deprecated_import( DeprecatedImport { deprecation: Deprecation::WithoutRename(operation), }, - stmt.range(), + import_from_stmt.range(), ); if let Some(content) = fix { diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( content, - stmt.range(), + import_from_stmt.range(), ))); } checker.diagnostics.push(diagnostic); @@ -744,7 +744,7 @@ pub(crate) fn deprecated_import( DeprecatedImport { deprecation: Deprecation::WithRename(operation), }, - stmt.range(), + import_from_stmt.range(), ); checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs index 499f30324eb15..bc75dbe6a7168 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs @@ -1,5 +1,7 @@ -use ruff_python_parser::{TokenKind, TokenKindIter}; -use ruff_text_size::TextRange; +use std::slice::Iter; + +use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_text_size::{Ranged, TextRange}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; @@ -36,17 +38,17 @@ impl AlwaysFixableViolation for ExtraneousParentheses { } // See: https://github.com/asottile/pyupgrade/blob/97ed6fb3cf2e650d4f762ba231c3f04c41797710/pyupgrade/_main.py#L148 -fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange, TextRange)> { +fn match_extraneous_parentheses(tokens: &mut Iter<'_, Token>) -> Option<(TextRange, TextRange)> { // Store the location of the extraneous opening parenthesis. let start_range = loop { - let (token, range) = tokens.next()?; + let token = tokens.next()?; - match token { + match token.kind() { TokenKind::Comment | TokenKind::NonLogicalNewline => { continue; } TokenKind::Lpar => { - break range; + break token.range(); } _ => { return None; @@ -62,22 +64,28 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange // Store the location of the extraneous closing parenthesis. let end_range = loop { - let (token, range) = tokens.next()?; - - // If we find a comma or a yield at depth 1 or 2, it's a tuple or coroutine. - if depth == 1 && matches!(token, TokenKind::Comma | TokenKind::Yield) { - return None; - } else if matches!(token, TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb) { - depth = depth.saturating_add(1); - } else if matches!(token, TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb) { - depth = depth.saturating_sub(1); + let token = tokens.next()?; + + match token.kind() { + // If we find a comma or a yield at depth 1 or 2, it's a tuple or coroutine. + TokenKind::Comma | TokenKind::Yield if depth == 1 => return None, + TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { + depth = depth.saturating_add(1); + } + TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { + depth = depth.saturating_sub(1); + } + _ => {} } if depth == 0 { - break range; + break token.range(); } - if !matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline) { + if !matches!( + token.kind(), + TokenKind::Comment | TokenKind::NonLogicalNewline + ) { empty_tuple = false; } }; @@ -88,9 +96,9 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange // Find the next non-coding token. let token = loop { - let (token, _) = tokens.next()?; + let token = tokens.next()?; - match token { + match token.kind() { TokenKind::Comment | TokenKind::NonLogicalNewline => continue, _ => { break token; @@ -98,7 +106,7 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange } }; - if matches!(token, TokenKind::Rpar) { + if matches!(token.kind(), TokenKind::Rpar) { Some((start_range, end_range)) } else { None @@ -108,15 +116,16 @@ fn match_extraneous_parentheses(tokens: &mut TokenKindIter) -> Option<(TextRange /// UP034 pub(crate) fn extraneous_parentheses( diagnostics: &mut Vec, - mut tokens: TokenKindIter, + tokens: &Tokens, locator: &Locator, ) { - while let Some((token, _)) = tokens.next() { - if !matches!(token, TokenKind::Lpar) { + let mut token_iter = tokens.up_to_first_unknown().iter(); + while let Some(token) = token_iter.next() { + if !matches!(token.kind(), TokenKind::Lpar) { continue; } - let Some((start_range, end_range)) = match_extraneous_parentheses(&mut tokens) else { + let Some((start_range, end_range)) = match_extraneous_parentheses(&mut token_iter) else { continue; }; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index d6441f9904490..930f2cd998459 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -11,7 +11,7 @@ use ruff_python_ast::{self as ast, Expr, Keyword}; use ruff_python_literal::format::{ FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate, }; -use ruff_python_parser::{lexer, Mode, Tok}; +use ruff_python_parser::TokenKind; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -409,15 +409,13 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F }; let mut patches: Vec<(TextRange, FStringConversion)> = vec![]; - let mut lex = lexer::lex_starts_at( - checker.locator().slice(call.func.range()), - Mode::Expression, - call.start(), - ) - .flatten(); + let mut tokens = checker.parsed().tokens().in_range(call.func.range()).iter(); let end = loop { - match lex.next() { - Some((Tok::Dot, range)) => { + let Some(token) = tokens.next() else { + unreachable!("Should break from the `Tok::Dot` arm"); + }; + match token.kind() { + TokenKind::Dot => { // ``` // ( // "a" @@ -429,10 +427,11 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // // We know that the expression is a string literal, so we can safely assume that the // dot is the start of an attribute access. - break range.start(); + break token.start(); } - Some((Tok::String { .. }, range)) => { - match FStringConversion::try_convert(range, &mut summary, checker.locator()) { + TokenKind::String => { + match FStringConversion::try_convert(token.range(), &mut summary, checker.locator()) + { // If the format string contains side effects that would need to be repeated, // we can't convert it to an f-string. Ok(FStringConversion::SideEffects) => return, @@ -440,11 +439,10 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // expression. Err(_) => return, // Otherwise, push the conversion to be processed later. - Ok(conversion) => patches.push((range, conversion)), + Ok(conversion) => patches.push((token.range(), conversion)), } } - Some(_) => continue, - None => unreachable!("Should break from the `Tok::Dot` arm"), + _ => {} } }; if patches.is_empty() { @@ -515,7 +513,7 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F // ) // ``` let has_comments = checker - .indexer() + .parsed() .comment_ranges() .intersects(call.arguments.range()); diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs index 03b33011be1b5..046ef14a6b312 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -8,7 +8,7 @@ use ruff_python_codegen::Stylist; use ruff_python_literal::cformat::{ CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, }; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_parser::TokenKind; use ruff_python_stdlib::identifiers::is_identifier; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -344,38 +344,22 @@ fn convertible(format_string: &CFormatString, params: &Expr) -> bool { } /// UP031 -pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right: &Expr) { - // Grab each string segment (in case there's an implicit concatenation). - let mut strings: Vec<(TextRange, AnyStringFlags)> = vec![]; - let mut extension = None; - for (tok, range) in lexer::lex_starts_at( - checker.locator().slice(expr), - checker.source_type.as_mode(), - expr.start(), - ) - .flatten() - { - match tok { - Tok::String { flags, .. } => strings.push((range, flags)), - // If we hit a right paren, we have to preserve it. - Tok::Rpar => extension = Some(range), - // Break as soon as we find the modulo symbol. - Tok::Percent => break, - _ => continue, - } - } - - // If there are no string segments, abort. - if strings.is_empty() { - return; - } +pub(crate) fn printf_string_formatting( + checker: &mut Checker, + bin_op: &ast::ExprBinOp, + string_expr: &ast::ExprStringLiteral, +) { + let right = &*bin_op.right; - // Parse each string segment. let mut num_positional_arguments = 0; let mut num_keyword_arguments = 0; - let mut format_strings = Vec::with_capacity(strings.len()); - for (range, flags) in &strings { - let string = checker.locator().slice(*range); + let mut format_strings: Vec<(TextRange, String)> = + Vec::with_capacity(string_expr.value.as_slice().len()); + + // Parse each string segment. + for string_literal in &string_expr.value { + let string = checker.locator().slice(string_literal); + let flags = AnyStringFlags::from(string_literal.flags); let string = &string [usize::from(flags.opener_len())..(string.len() - usize::from(flags.closer_len()))]; @@ -400,7 +384,10 @@ pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right } // Convert the `%`-format string to a `.format` string. - format_strings.push(flags.format_string_contents(&percent_to_format(&format_string))); + format_strings.push(( + string_literal.range(), + flags.format_string_contents(&percent_to_format(&format_string)), + )); } // Parse the parameters. @@ -448,41 +435,55 @@ pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right // Reconstruct the string. let mut contents = String::new(); - let mut prev = None; - for ((range, _), format_string) in strings.iter().zip(format_strings) { + let mut prev_end = None; + for (range, format_string) in format_strings { // Add the content before the string segment. - match prev { + match prev_end { None => { contents.push_str( checker .locator() - .slice(TextRange::new(expr.start(), range.start())), + .slice(TextRange::new(bin_op.start(), range.start())), ); } - Some(prev) => { - contents.push_str(checker.locator().slice(TextRange::new(prev, range.start()))); + Some(prev_end) => { + contents.push_str( + checker + .locator() + .slice(TextRange::new(prev_end, range.start())), + ); } } // Add the string itself. contents.push_str(&format_string); - prev = Some(range.end()); + prev_end = Some(range.end()); } - if let Some(range) = extension { - contents.push_str( - checker - .locator() - .slice(TextRange::new(prev.unwrap(), range.end())), - ); + if let Some(prev_end) = prev_end { + for token in checker.parsed().tokens().after(prev_end) { + match token.kind() { + // If we hit a right paren, we have to preserve it. + TokenKind::Rpar => { + contents.push_str( + checker + .locator() + .slice(TextRange::new(prev_end, token.end())), + ); + } + // Break as soon as we find the modulo symbol. + TokenKind::Percent => break, + _ => {} + } + } } // Add the `.format` call. contents.push_str(&format!(".format{params_string}")); - let mut diagnostic = Diagnostic::new(PrintfStringFormatting, expr.range()); + let mut diagnostic = Diagnostic::new(PrintfStringFormatting, bin_op.range()); diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement( contents, - expr.range(), + bin_op.range(), ))); checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs index 6cbd36e233c74..d502107007894 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs @@ -4,9 +4,8 @@ use anyhow::{anyhow, Result}; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, Expr, PySourceType}; -use ruff_python_parser::{lexer, AsMode}; -use ruff_source_file::Locator; +use ruff_python_ast::{self as ast, Expr}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use crate::checkers::ast::Checker; @@ -76,12 +75,11 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall) }) = &keyword.value { if let Ok(mode) = OpenMode::from_str(mode_param_value.to_str()) { - checker.diagnostics.push(create_check( + checker.diagnostics.push(create_diagnostic( call, &keyword.value, mode.replacement_value(), - checker.locator(), - checker.source_type, + checker.parsed().tokens(), )); } } @@ -91,12 +89,11 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall) Some(mode_param) => { if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = &mode_param { if let Ok(mode) = OpenMode::from_str(value.to_str()) { - checker.diagnostics.push(create_check( + checker.diagnostics.push(create_diagnostic( call, mode_param, mode.replacement_value(), - checker.locator(), - checker.source_type, + checker.parsed().tokens(), )); } } @@ -146,18 +143,17 @@ impl OpenMode { } } -fn create_check( - expr: &T, +fn create_diagnostic( + call: &ast::ExprCall, mode_param: &Expr, replacement_value: Option<&str>, - locator: &Locator, - source_type: PySourceType, + tokens: &Tokens, ) -> Diagnostic { let mut diagnostic = Diagnostic::new( RedundantOpenModes { replacement: replacement_value.map(ToString::to_string), }, - expr.range(), + call.range(), ); if let Some(content) = replacement_value { @@ -166,52 +162,53 @@ fn create_check( mode_param.range(), ))); } else { - diagnostic.try_set_fix(|| { - create_remove_param_fix(locator, expr, mode_param, source_type).map(Fix::safe_edit) - }); + diagnostic + .try_set_fix(|| create_remove_param_fix(call, mode_param, tokens).map(Fix::safe_edit)); } diagnostic } -fn create_remove_param_fix( - locator: &Locator, - expr: &T, +fn create_remove_param_fix( + call: &ast::ExprCall, mode_param: &Expr, - source_type: PySourceType, + tokens: &Tokens, ) -> Result { - let content = locator.slice(expr); // Find the last comma before mode_param and create a deletion fix // starting from the comma and ending after mode_param. let mut fix_start: Option = None; let mut fix_end: Option = None; let mut is_first_arg: bool = false; let mut delete_first_arg: bool = false; - for (tok, range) in lexer::lex_starts_at(content, source_type.as_mode(), expr.start()).flatten() - { - if range.start() == mode_param.start() { + + for token in tokens.in_range(call.range()) { + if token.start() == mode_param.start() { if is_first_arg { delete_first_arg = true; continue; } - fix_end = Some(range.end()); - break; - } - if delete_first_arg && tok.is_name() { - fix_end = Some(range.start()); + fix_end = Some(token.end()); break; } - if tok.is_lpar() { - is_first_arg = true; - fix_start = Some(range.end()); - } - if tok.is_comma() { - is_first_arg = false; - if !delete_first_arg { - fix_start = Some(range.start()); + match token.kind() { + TokenKind::Name if delete_first_arg => { + fix_end = Some(token.start()); + break; } + TokenKind::Lpar => { + is_first_arg = true; + fix_start = Some(token.end()); + } + TokenKind::Comma => { + is_first_arg = false; + if !delete_first_arg { + fix_start = Some(token.start()); + } + } + _ => {} } } + match (fix_start, fix_end) { (Some(start), Some(end)) => Ok(Edit::deletion(start, end)), _ => Err(anyhow::anyhow!( diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs index 46ed24c176ccd..68b0ee777f391 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs @@ -4,6 +4,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -49,10 +50,11 @@ pub(crate) fn unnecessary_coding_comment( diagnostics: &mut Vec, locator: &Locator, indexer: &Indexer, + comment_ranges: &CommentRanges, ) { // The coding comment must be on one of the first two lines. Since each comment spans at least // one line, we only need to check the first two comments at most. - for comment_range in indexer.comment_ranges().iter().take(2) { + for comment_range in comment_ranges.iter().take(2) { // If leading content is not whitespace then it's not a valid coding comment e.g. // ``` // print(x) # coding=utf8 diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs index db894ed688df3..6ed669662897e 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs @@ -1,7 +1,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{self as ast, Arguments, Expr, Keyword, PySourceType}; -use ruff_python_parser::{lexer, AsMode, Tok}; +use ruff_python_ast::{self as ast, Arguments, Expr, Keyword}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -117,33 +117,26 @@ fn match_encoding_arg(arguments: &Arguments) -> Option { } /// Return a [`Fix`] replacing the call to encode with a byte string. -fn replace_with_bytes_literal( - locator: &Locator, - call: &ast::ExprCall, - source_type: PySourceType, -) -> Fix { +fn replace_with_bytes_literal(locator: &Locator, call: &ast::ExprCall, tokens: &Tokens) -> Fix { // Build up a replacement string by prefixing all string tokens with `b`. - let contents = locator.slice(call); - let mut replacement = String::with_capacity(contents.len() + 1); + let mut replacement = String::with_capacity(call.range().len().to_usize() + 1); let mut prev = call.start(); - for (tok, range) in - lexer::lex_starts_at(contents, source_type.as_mode(), call.start()).flatten() - { - match tok { - Tok::Dot => break, - Tok::String { .. } => { - replacement.push_str(locator.slice(TextRange::new(prev, range.start()))); - let string = locator.slice(range); + for token in tokens.in_range(call.range()) { + match token.kind() { + TokenKind::Dot => break, + TokenKind::String => { + replacement.push_str(locator.slice(TextRange::new(prev, token.start()))); + let string = locator.slice(token); replacement.push_str(&format!( "b{}", &string.trim_start_matches('u').trim_start_matches('U') )); } _ => { - replacement.push_str(locator.slice(TextRange::new(prev, range.end()))); + replacement.push_str(locator.slice(TextRange::new(prev, token.end()))); } } - prev = range.end(); + prev = token.end(); } Fix::safe_edit(Edit::range_replacement( @@ -172,7 +165,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal diagnostic.set_fix(replace_with_bytes_literal( checker.locator(), call, - checker.source_type, + checker.parsed().tokens(), )); checker.diagnostics.push(diagnostic); } else if let EncodingArg::Keyword(kwarg) = encoding_arg { diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs index d371eb96610aa..7e87c72d4ff5c 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/yield_in_for_loop.rs @@ -116,7 +116,7 @@ pub(crate) fn yield_in_for_loop(checker: &mut Checker, stmt_for: &ast::StmtFor) parenthesized_range( iter.as_ref().into(), stmt_for.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(iter.range()), diff --git a/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs b/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs index 131b8eb789f36..c3f404c2437f6 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/if_exp_instead_of_or_operator.rs @@ -7,7 +7,7 @@ use ruff_python_ast::comparable::ComparableExpr; use ruff_python_ast::helpers::contains_effect; use ruff_python_ast::parenthesize::parenthesized_range; use ruff_python_ast::Expr; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -74,8 +74,18 @@ pub(crate) fn if_exp_instead_of_or_operator(checker: &mut Checker, if_expr: &ast Edit::range_replacement( format!( "{} or {}", - parenthesize_test(test, if_expr, checker.indexer(), checker.locator()), - parenthesize_test(orelse, if_expr, checker.indexer(), checker.locator()), + parenthesize_test( + test, + if_expr, + checker.parsed().comment_ranges(), + checker.locator() + ), + parenthesize_test( + orelse, + if_expr, + checker.parsed().comment_ranges(), + checker.locator() + ), ), if_expr.range(), ), @@ -99,13 +109,13 @@ pub(crate) fn if_exp_instead_of_or_operator(checker: &mut Checker, if_expr: &ast fn parenthesize_test<'a>( expr: &Expr, if_expr: &ast::ExprIf, - indexer: &Indexer, + comment_ranges: &CommentRanges, locator: &Locator<'a>, ) -> Cow<'a, str> { if let Some(range) = parenthesized_range( expr.into(), if_expr.into(), - indexer.comment_ranges(), + comment_ranges, locator.contents(), ) { Cow::Borrowed(locator.slice(range)) diff --git a/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs b/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs index 1eb6bbaf6ff2d..60893aa8e3988 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/repeated_append.rs @@ -114,7 +114,7 @@ pub(crate) fn repeated_append(checker: &mut Checker, stmt: &Stmt) { // # comment // a.append(2) // ``` - if group.is_consecutive && !checker.indexer().comment_ranges().intersects(group.range()) + if group.is_consecutive && !checker.parsed().comment_ranges().intersects(group.range()) { diagnostic.set_fix(Fix::unsafe_edit(Edit::replacement( replacement, diff --git a/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs b/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs index e635e1e2da2f7..97f9aea11648a 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/single_item_membership_test.rs @@ -83,7 +83,7 @@ pub(crate) fn single_item_membership_test( &[membership_test.replacement_op()], &[item.clone()], expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator(), ), expr.range(), diff --git a/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs b/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs index 05f7f602db751..b4fe7df371f25 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/collection_literal_concatenation.rs @@ -199,7 +199,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp expr.range(), ); if !checker - .indexer() + .parsed() .comment_ranges() .has_comments(expr, checker.locator()) { diff --git a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs index 8fe4215551b6f..2f132ca140ab0 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/invalid_formatter_suppression_comment.rs @@ -69,9 +69,9 @@ impl AlwaysFixableViolation for InvalidFormatterSuppressionComment { /// RUF028 pub(crate) fn ignored_formatter_suppression_comment(checker: &mut Checker, suite: &ast::Suite) { - let indexer = checker.indexer(); let locator = checker.locator(); - let comment_ranges: SmallVec<[SuppressionComment; 8]> = indexer + let comment_ranges: SmallVec<[SuppressionComment; 8]> = checker + .parsed() .comment_ranges() .into_iter() .filter_map(|range| { diff --git a/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs b/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs index 35976dbf5bf0c..95989f9721ce9 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/missing_fstring_syntax.rs @@ -114,10 +114,12 @@ fn should_be_fstring( } let fstring_expr = format!("f{}", locator.slice(literal)); + let Ok(parsed) = parse_expression(&fstring_expr) else { + return false; + }; // Note: Range offsets for `value` are based on `fstring_expr` - let Ok(ast::Expr::FString(ast::ExprFString { value, .. })) = parse_expression(&fstring_expr) - else { + let Some(ast::ExprFString { value, .. }) = parsed.expr().as_f_string_expr() else { return false; }; diff --git a/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs b/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs index 04e140bf360bf..cdb7b9e6c1ce7 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/parenthesize_logical_operators.rs @@ -84,7 +84,7 @@ pub(crate) fn parenthesize_chained_logical_operators( if parenthesized_range( bool_op.into(), expr.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), locator.contents(), ) .is_none() diff --git a/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs b/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs index 77a4160ec2a63..2d8e684f0f1a2 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/quadratic_list_summation.rs @@ -111,7 +111,7 @@ fn convert_to_reduce(iterable: &Expr, call: &ast::ExprCall, checker: &Checker) - parenthesized_range( iterable.into(), call.arguments.as_any_node_ref(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(iterable.range()), diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index 5953ab55a1284..9f4ce6129cabc 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -8,7 +8,7 @@ use std::cmp::Ordering; use ruff_python_ast as ast; use ruff_python_codegen::Stylist; -use ruff_python_parser::{lexer, Mode, Tok, TokenKind}; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_stdlib::str::is_cased_uppercase; use ruff_python_trivia::{first_non_trivia_token, leading_indentation, SimpleTokenKind}; use ruff_source_file::Locator; @@ -336,6 +336,7 @@ impl<'a> MultilineStringSequenceValue<'a> { range: TextRange, kind: SequenceKind, locator: &Locator, + tokens: &Tokens, string_items: &[&'a str], ) -> Option> { // Parse the multiline string sequence using the raw tokens. @@ -344,7 +345,7 @@ impl<'a> MultilineStringSequenceValue<'a> { // // Step (1). Start by collecting information on each line individually: let (lines, ends_with_trailing_comma) = - collect_string_sequence_lines(range, kind, locator, string_items)?; + collect_string_sequence_lines(range, kind, tokens, string_items)?; // Step (2). Group lines together into sortable "items": // - Any "item" contains a single element of the list/tuple @@ -488,7 +489,7 @@ impl Ranged for MultilineStringSequenceValue<'_> { fn collect_string_sequence_lines<'a>( range: TextRange, kind: SequenceKind, - locator: &Locator, + tokens: &Tokens, string_items: &[&'a str], ) -> Option<(Vec>, bool)> { // These first two variables are used for keeping track of state @@ -501,39 +502,34 @@ fn collect_string_sequence_lines<'a>( // An iterator over the string values in the sequence. let mut string_items_iter = string_items.iter(); - // `lex_starts_at()` gives us absolute ranges rather than relative ranges, - // but (surprisingly) we still need to pass in the slice of code we want it to lex, - // rather than the whole source file: - let mut token_iter = - lexer::lex_starts_at(locator.slice(range), Mode::Expression, range.start()); - let (first_tok, _) = token_iter.next()?.ok()?; - if TokenKind::from(&first_tok) != kind.opening_token_for_multiline_definition() { + let mut token_iter = tokens.in_range(range).iter(); + let first_token = token_iter.next()?; + if first_token.kind() != kind.opening_token_for_multiline_definition() { return None; } let expected_final_token = kind.closing_token_for_multiline_definition(); - for pair in token_iter { - let (tok, subrange) = pair.ok()?; - match tok { - Tok::NonLogicalNewline => { + for token in token_iter { + match token.kind() { + TokenKind::NonLogicalNewline => { lines.push(line_state.into_string_sequence_line()); line_state = LineState::default(); } - Tok::Comment(_) => { - line_state.visit_comment_token(subrange); + TokenKind::Comment => { + line_state.visit_comment_token(token.range()); } - Tok::String { .. } => { + TokenKind::String => { let Some(string_value) = string_items_iter.next() else { unreachable!("Expected the number of string tokens to be equal to the number of string items in the sequence"); }; - line_state.visit_string_token(string_value, subrange); + line_state.visit_string_token(string_value, token.range()); ends_with_trailing_comma = false; } - Tok::Comma => { - line_state.visit_comma_token(subrange); + TokenKind::Comma => { + line_state.visit_comma_token(token.range()); ends_with_trailing_comma = true; } - tok if TokenKind::from(&tok) == expected_final_token => { + kind if kind == expected_final_token => { lines.push(line_state.into_string_sequence_line()); break; } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index 2d88b64defb0e..0ac227e935794 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -216,6 +216,7 @@ fn create_fix( range, kind, locator, + checker.parsed().tokens(), string_items, )?; assert_eq!(value.len(), elts.len()); diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index 46adf10fb4bdf..55b12f2684911 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -210,6 +210,7 @@ impl<'a> StringLiteralDisplay<'a> { self.range(), *sequence_kind, locator, + checker.parsed().tokens(), elements, )?; assert_eq!(analyzed_sequence.len(), self.elts.len()); diff --git a/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs b/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs index d148dff835d95..b9e9cea7c0af6 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/test_rules.rs @@ -15,15 +15,15 @@ /// will not converge. use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_index::Indexer; +use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; use ruff_text_size::TextSize; use crate::registry::Rule; /// Check if a comment exists anywhere in a the given file -fn comment_exists(text: &str, locator: &Locator, indexer: &Indexer) -> bool { - for range in indexer.comment_ranges() { +fn comment_exists(text: &str, locator: &Locator, comment_ranges: &CommentRanges) -> bool { + for range in comment_ranges { let comment_text = locator.slice(range); if text.trim_end() == comment_text { return true; @@ -49,7 +49,7 @@ pub(crate) const TEST_RULES: &[Rule] = &[ ]; pub(crate) trait TestRule { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option; + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option; } /// ## What it does @@ -80,7 +80,7 @@ impl Violation for StableTestRule { } impl TestRule for StableTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( StableTestRule, ruff_text_size::TextRange::default(), @@ -116,9 +116,9 @@ impl Violation for StableTestRuleSafeFix { } impl TestRule for StableTestRuleSafeFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-safe-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -160,9 +160,9 @@ impl Violation for StableTestRuleUnsafeFix { } impl TestRule for StableTestRuleUnsafeFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-unsafe-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -207,9 +207,9 @@ impl Violation for StableTestRuleDisplayOnlyFix { } impl TestRule for StableTestRuleDisplayOnlyFix { - fn diagnostic(locator: &Locator, indexer: &Indexer) -> Option { + fn diagnostic(locator: &Locator, comment_ranges: &CommentRanges) -> Option { let comment = format!("# fix from stable-test-rule-display-only-fix\n"); - if comment_exists(&comment, locator, indexer) { + if comment_exists(&comment, locator, comment_ranges) { None } else { Some( @@ -254,7 +254,7 @@ impl Violation for PreviewTestRule { } impl TestRule for PreviewTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( PreviewTestRule, ruff_text_size::TextRange::default(), @@ -290,7 +290,7 @@ impl Violation for NurseryTestRule { } impl TestRule for NurseryTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( NurseryTestRule, ruff_text_size::TextRange::default(), @@ -326,7 +326,7 @@ impl Violation for DeprecatedTestRule { } impl TestRule for DeprecatedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( DeprecatedTestRule, ruff_text_size::TextRange::default(), @@ -362,7 +362,7 @@ impl Violation for AnotherDeprecatedTestRule { } impl TestRule for AnotherDeprecatedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( AnotherDeprecatedTestRule, ruff_text_size::TextRange::default(), @@ -398,7 +398,7 @@ impl Violation for RemovedTestRule { } impl TestRule for RemovedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RemovedTestRule, ruff_text_size::TextRange::default(), @@ -434,7 +434,7 @@ impl Violation for AnotherRemovedTestRule { } impl TestRule for AnotherRemovedTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( AnotherRemovedTestRule, ruff_text_size::TextRange::default(), @@ -470,7 +470,7 @@ impl Violation for RedirectedFromTestRule { } impl TestRule for RedirectedFromTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedFromTestRule, ruff_text_size::TextRange::default(), @@ -506,7 +506,7 @@ impl Violation for RedirectedToTestRule { } impl TestRule for RedirectedToTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedToTestRule, ruff_text_size::TextRange::default(), @@ -542,7 +542,7 @@ impl Violation for RedirectedFromPrefixTestRule { } impl TestRule for RedirectedFromPrefixTestRule { - fn diagnostic(_locator: &Locator, _indexer: &Indexer) -> Option { + fn diagnostic(_locator: &Locator, _comment_ranges: &CommentRanges) -> Option { Some(Diagnostic::new( RedirectedFromPrefixTestRule, ruff_text_size::TextRange::default(), diff --git a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs index 97724db2127ca..1813efd784ddf 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_key_check.rs @@ -110,7 +110,7 @@ pub(crate) fn unnecessary_key_check(checker: &mut Checker, expr: &Expr) { parenthesized_range( obj_right.into(), right.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(obj_right.range()) @@ -119,7 +119,7 @@ pub(crate) fn unnecessary_key_check(checker: &mut Checker, expr: &Expr) { parenthesized_range( key_right.into(), right.into(), - checker.indexer().comment_ranges(), + checker.parsed().comment_ranges(), checker.locator().contents(), ) .unwrap_or(key_right.range()) diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index b646d76bd191c..d23406bef00b0 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -16,14 +16,13 @@ use ruff_notebook::NotebookError; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::AsMode; use ruff_python_trivia::textwrap::dedent; use ruff_source_file::{Locator, SourceFileBuilder}; use ruff_text_size::Ranged; use crate::directives; use crate::fix::{fix_file, FixResult}; -use crate::linter::{check_path, LinterResult, TokenSource}; +use crate::linter::{check_path, LinterResult}; use crate::message::{Emitter, EmitterContext, Message, TextEmitter}; use crate::packaging::detect_package_root; use crate::registry::AsRule; @@ -110,12 +109,12 @@ pub(crate) fn test_contents<'a>( settings: &LinterSettings, ) -> (Vec, Cow<'a, SourceKind>) { let source_type = PySourceType::from(path); - let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let locator = Locator::new(source_kind.source_code()); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -135,7 +134,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let source_has_errors = error.is_some(); @@ -175,13 +174,13 @@ pub(crate) fn test_contents<'a>( transformed = Cow::Owned(transformed.updated(fixed_contents, &source_map)); - let tokens = - ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); + let parsed = + ruff_python_parser::parse_unchecked_source(transformed.source_code(), source_type); let locator = Locator::new(transformed.source_code()); - let stylist = Stylist::from_tokens(&tokens, &locator); - let indexer = Indexer::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); let directives = directives::extract_directives( - &tokens, + &parsed, directives::Flags::from_settings(settings), &locator, &indexer, @@ -201,7 +200,7 @@ pub(crate) fn test_contents<'a>( flags::Noqa::Enabled, &transformed, source_type, - TokenSource::Tokens(tokens), + &parsed, ); if let Some(fixed_error) = fixed_error { diff --git a/crates/ruff_python_ast/src/str_prefix.rs b/crates/ruff_python_ast/src/str_prefix.rs index e6784d2604840..b2da865d1772f 100644 --- a/crates/ruff_python_ast/src/str_prefix.rs +++ b/crates/ruff_python_ast/src/str_prefix.rs @@ -150,45 +150,6 @@ impl AnyStringPrefix { } } -impl TryFrom for AnyStringPrefix { - type Error = String; - - fn try_from(value: char) -> Result { - let result = match value { - 'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }), - 'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }), - 'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode), - 'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular), - 'f' | 'F' => Self::Format(FStringPrefix::Regular), - _ => return Err(format!("Unexpected prefix '{value}'")), - }; - Ok(result) - } -} - -impl TryFrom<[char; 2]> for AnyStringPrefix { - type Error = String; - - fn try_from(value: [char; 2]) -> Result { - let result = match value { - ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: true }) - } - ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) - } - _ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])), - }; - Ok(result) - } -} - impl fmt::Display for AnyStringPrefix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) diff --git a/crates/ruff_python_ast_integration_tests/tests/identifier.rs b/crates/ruff_python_ast_integration_tests/tests/identifier.rs index 1e70c4fd65f50..324390b8454c9 100644 --- a/crates/ruff_python_ast_integration_tests/tests/identifier.rs +++ b/crates/ruff_python_ast_integration_tests/tests/identifier.rs @@ -1,5 +1,5 @@ use ruff_python_ast::identifier; -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_text_size::{TextRange, TextSize}; #[test] @@ -11,7 +11,7 @@ else: pass " .trim(); - let stmts = parse_suite(contents)?; + let stmts = parse_module(contents)?.into_suite(); let stmt = stmts.first().unwrap(); let range = identifier::else_(stmt, contents).unwrap(); assert_eq!(&contents[range], "else"); diff --git a/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs b/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs index 6e6b2eeaaa07e..ec6b5d8650935 100644 --- a/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs +++ b/crates/ruff_python_ast_integration_tests/tests/parenthesize.rs @@ -6,9 +6,9 @@ use ruff_text_size::TextRange; #[test] fn test_parenthesized_name() { let source_code = r"(x) + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -23,9 +23,9 @@ fn test_parenthesized_name() { #[test] fn test_non_parenthesized_name() { let source_code = r"x + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -40,9 +40,9 @@ fn test_non_parenthesized_name() { #[test] fn test_parenthesized_argument() { let source_code = r"f((a))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); @@ -58,9 +58,9 @@ fn test_parenthesized_argument() { #[test] fn test_non_parenthesized_argument() { let source_code = r"f(a)"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); @@ -76,9 +76,9 @@ fn test_non_parenthesized_argument() { #[test] fn test_parenthesized_tuple_member() { let source_code = r"(a, (b))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let tuple = expr.as_tuple_expr().unwrap(); + let tuple = parsed.expr().as_tuple_expr().unwrap(); let member = tuple.elts.last().unwrap(); let parenthesized = parenthesized_range( @@ -93,9 +93,9 @@ fn test_parenthesized_tuple_member() { #[test] fn test_non_parenthesized_tuple_member() { let source_code = r"(a, b)"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let tuple = expr.as_tuple_expr().unwrap(); + let tuple = parsed.expr().as_tuple_expr().unwrap(); let member = tuple.elts.last().unwrap(); let parenthesized = parenthesized_range( @@ -110,9 +110,9 @@ fn test_non_parenthesized_tuple_member() { #[test] fn test_twice_parenthesized_name() { let source_code = r"((x)) + 1"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let bin_op = expr.as_bin_op_expr().unwrap(); + let bin_op = parsed.expr().as_bin_op_expr().unwrap(); let name = bin_op.left.as_ref(); let parenthesized = parenthesized_range( @@ -127,9 +127,9 @@ fn test_twice_parenthesized_name() { #[test] fn test_twice_parenthesized_argument() { let source_code = r"f(((a + 1)))"; - let expr = parse_expression(source_code).unwrap(); + let parsed = parse_expression(source_code).unwrap(); - let call = expr.as_call_expr().unwrap(); + let call = parsed.expr().as_call_expr().unwrap(); let arguments = &call.arguments; let argument = arguments.args.first().unwrap(); diff --git a/crates/ruff_python_ast_integration_tests/tests/preorder.rs b/crates/ruff_python_ast_integration_tests/tests/preorder.rs index 21a159b424a3a..8c375da3e0ed2 100644 --- a/crates/ruff_python_ast_integration_tests/tests/preorder.rs +++ b/crates/ruff_python_ast_integration_tests/tests/preorder.rs @@ -4,8 +4,7 @@ use insta::assert_snapshot; use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, BoolOp, CmpOp, Operator, Singleton, UnaryOp}; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[test] fn function_arguments() { @@ -148,11 +147,10 @@ fn f_strings() { } fn trace_preorder_visitation(source: &str) -> String { - let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); + let parsed = parse(source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); - visitor.visit_mod(&parsed); + visitor.visit_mod(parsed.syntax()); visitor.output } diff --git a/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs b/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs index cacf964996f5b..240d01187efc8 100644 --- a/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs +++ b/crates/ruff_python_ast_integration_tests/tests/stmt_if.rs @@ -1,5 +1,5 @@ use ruff_python_ast::stmt_if::elif_else_range; -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_text_size::TextSize; #[test] @@ -9,12 +9,14 @@ fn extract_elif_else_range() -> Result<(), ParseError> { elif b: ... "; - let mut stmts = parse_suite(contents)?; - let stmt = stmts - .pop() - .and_then(ruff_python_ast::Stmt::if_stmt) - .unwrap(); - let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap(); + let parsed = parse_module(contents)?; + let if_stmt = parsed + .suite() + .first() + .expect("module should contain at least one statement") + .as_if_stmt() + .expect("first statement should be an `if` statement"); + let range = elif_else_range(&if_stmt.elif_else_clauses[0], contents).unwrap(); assert_eq!(range.start(), TextSize::from(14)); assert_eq!(range.end(), TextSize::from(18)); @@ -23,12 +25,14 @@ elif b: else: ... "; - let mut stmts = parse_suite(contents)?; - let stmt = stmts - .pop() - .and_then(ruff_python_ast::Stmt::if_stmt) - .unwrap(); - let range = elif_else_range(&stmt.elif_else_clauses[0], contents).unwrap(); + let parsed = parse_module(contents)?; + let if_stmt = parsed + .suite() + .first() + .expect("module should contain at least one statement") + .as_if_stmt() + .expect("first statement should be an `if` statement"); + let range = elif_else_range(&if_stmt.elif_else_clauses[0], contents).unwrap(); assert_eq!(range.start(), TextSize::from(14)); assert_eq!(range.end(), TextSize::from(18)); diff --git a/crates/ruff_python_ast_integration_tests/tests/visitor.rs b/crates/ruff_python_ast_integration_tests/tests/visitor.rs index 1c1bf0d0f7fb3..128d0c3f12184 100644 --- a/crates/ruff_python_ast_integration_tests/tests/visitor.rs +++ b/crates/ruff_python_ast_integration_tests/tests/visitor.rs @@ -13,8 +13,7 @@ use ruff_python_ast::{ Expr, FString, FStringElement, Keyword, MatchCase, Operator, Parameter, Parameters, Pattern, Stmt, StringLiteral, TypeParam, UnaryOp, WithItem, }; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{parse_tokens, Mode}; +use ruff_python_parser::{parse, Mode}; #[test] fn function_arguments() { @@ -157,11 +156,10 @@ fn f_strings() { } fn trace_visitation(source: &str) -> String { - let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); + let parsed = parse(source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); - walk_module(&mut visitor, &parsed); + walk_module(&mut visitor, parsed.syntax()); visitor.output } diff --git a/crates/ruff_python_codegen/Cargo.toml b/crates/ruff_python_codegen/Cargo.toml index 7afd304046f25..cf273027bb79d 100644 --- a/crates/ruff_python_codegen/Cargo.toml +++ b/crates/ruff_python_codegen/Cargo.toml @@ -18,6 +18,7 @@ ruff_python_ast = { workspace = true } ruff_python_literal = { workspace = true } ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } +ruff_text_size = { workspace = true } once_cell = { workspace = true } diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index 1c95db1f9c3c0..9cb98dd7c6174 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -1416,7 +1416,7 @@ impl<'a> Generator<'a> { #[cfg(test)] mod tests { use ruff_python_ast::{str::Quote, Mod, ModModule}; - use ruff_python_parser::{self, parse_suite, Mode}; + use ruff_python_parser::{self, parse_module, Mode}; use ruff_source_file::LineEnding; use crate::stylist::Indentation; @@ -1427,9 +1427,9 @@ mod tests { let indentation = Indentation::default(); let quote = Quote::default(); let line_ending = LineEnding::default(); - let stmt = parse_suite(contents).unwrap(); + let module = parse_module(contents).unwrap(); let mut generator = Generator::new(&indentation, quote, line_ending); - generator.unparse_suite(&stmt); + generator.unparse_suite(module.suite()); generator.generate() } @@ -1439,9 +1439,9 @@ mod tests { line_ending: LineEnding, contents: &str, ) -> String { - let stmt = parse_suite(contents).unwrap(); + let module = parse_module(contents).unwrap(); let mut generator = Generator::new(indentation, quote, line_ending); - generator.unparse_suite(&stmt); + generator.unparse_suite(module.suite()); generator.generate() } @@ -1449,8 +1449,8 @@ mod tests { let indentation = Indentation::default(); let quote = Quote::default(); let line_ending = LineEnding::default(); - let ast = ruff_python_parser::parse(contents, Mode::Ipython).unwrap(); - let Mod::Module(ModModule { body, .. }) = ast else { + let parsed = ruff_python_parser::parse(contents, Mode::Ipython).unwrap(); + let Mod::Module(ModModule { body, .. }) = parsed.into_syntax() else { panic!("Source code didn't return ModModule") }; let [stmt] = body.as_slice() else { diff --git a/crates/ruff_python_codegen/src/lib.rs b/crates/ruff_python_codegen/src/lib.rs index baa71ea1278fb..64a991edcd750 100644 --- a/crates/ruff_python_codegen/src/lib.rs +++ b/crates/ruff_python_codegen/src/lib.rs @@ -2,17 +2,16 @@ mod generator; mod stylist; pub use generator::Generator; -use ruff_python_parser::{lexer, parse_suite, Mode, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_source_file::Locator; pub use stylist::Stylist; /// Run round-trip source code generation on a given Python code. pub fn round_trip(code: &str) -> Result { let locator = Locator::new(code); - let python_ast = parse_suite(code)?; - let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect(); - let stylist = Stylist::from_tokens(&tokens, &locator); + let parsed = parse_module(code)?; + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let mut generator: Generator = (&stylist).into(); - generator.unparse_suite(&python_ast); + generator.unparse_suite(parsed.suite()); Ok(generator.generate()) } diff --git a/crates/ruff_python_codegen/src/stylist.rs b/crates/ruff_python_codegen/src/stylist.rs index fc9e43bfb6926..375f0c8e16e92 100644 --- a/crates/ruff_python_codegen/src/stylist.rs +++ b/crates/ruff_python_codegen/src/stylist.rs @@ -4,10 +4,10 @@ use std::ops::Deref; use once_cell::unsync::OnceCell; -use ruff_python_ast::{str::Quote, StringFlags}; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::{Tok, TokenKind}; +use ruff_python_ast::str::Quote; +use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_source_file::{find_newline, LineEnding, Locator}; +use ruff_text_size::Ranged; #[derive(Debug, Clone)] pub struct Stylist<'a> { @@ -35,40 +35,42 @@ impl<'a> Stylist<'a> { }) } - pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self { - let indentation = detect_indention(tokens, locator); + pub fn from_tokens(tokens: &Tokens, locator: &'a Locator<'a>) -> Self { + let indentation = detect_indention(tokens.up_to_first_unknown(), locator); Self { locator, indentation, - quote: detect_quote(tokens), + quote: detect_quote(tokens.up_to_first_unknown()), line_ending: OnceCell::default(), } } } -fn detect_quote(tokens: &[LexResult]) -> Quote { - for (token, _) in tokens.iter().flatten() { - match token { - Tok::String { flags, .. } if !flags.is_triple_quoted() => return flags.quote_style(), - Tok::FStringStart(flags) => return flags.quote_style(), +fn detect_quote(tokens: &[Token]) -> Quote { + for token in tokens { + match token.kind() { + TokenKind::String if !token.is_triple_quoted_string() => { + return token.string_quote_style() + } + TokenKind::FStringStart => return token.string_quote_style(), _ => continue, } } Quote::default() } -fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { - let indent_range = tokens.iter().flatten().find_map(|(t, range)| { - if matches!(t, Tok::Indent) { - Some(range) +fn detect_indention(tokens: &[Token], locator: &Locator) -> Indentation { + let indent_range = tokens.iter().find_map(|token| { + if matches!(token.kind(), TokenKind::Indent) { + Some(token.range()) } else { None } }); if let Some(indent_range) = indent_range { - let mut whitespace = locator.slice(*indent_range); + let mut whitespace = locator.slice(indent_range); // https://docs.python.org/3/reference/lexical_analysis.html#indentation // > A formfeed character may be present at the start of the line; it will be ignored for // > the indentation calculations above. Formfeed characters occurring elsewhere in the @@ -96,7 +98,7 @@ fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { // ) // ``` let mut depth = 0usize; - for (token, range) in tokens.iter().flatten() { + for token in tokens { match token.kind() { TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => { depth = depth.saturating_add(1); @@ -105,7 +107,7 @@ fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation { depth = depth.saturating_sub(1); } TokenKind::NonLogicalNewline => { - let line = locator.line(range.end()); + let line = locator.line(token.end()); let indent_index = line.find(|c: char| !c.is_whitespace()); if let Some(indent_index) = indent_index { if indent_index > 0 { @@ -158,8 +160,7 @@ impl Deref for Indentation { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::lex; - use ruff_python_parser::Mode; + use ruff_python_parser::{parse_module, parse_unchecked, Mode}; use ruff_source_file::{find_newline, LineEnding}; @@ -170,44 +171,36 @@ mod tests { fn indentation() { let contents = r"x = 1"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation::default()); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" if True: pass "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation("\t".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation("\t".to_string())); let contents = r" x = ( @@ -217,11 +210,9 @@ x = ( ) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); let contents = r" x = ( @@ -231,9 +222,9 @@ x = ( ) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); + let parsed = parse_unchecked(contents, Mode::Module); assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), + Stylist::from_tokens(parsed.tokens(), &locator).indentation(), &Indentation(" ".to_string()) ); @@ -244,62 +235,48 @@ class FormFeedIndent: print(a) "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).indentation(), - &Indentation(" ".to_string()) - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); } #[test] fn quote() { let contents = r"x = 1"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::default()); let contents = r"x = '1'"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r"x = f'1'"; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#"x = "1""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r#"x = f"1""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r#"s = "It's done.""#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); // No style if only double quoted docstring (will take default Double) let contents = r#" @@ -308,11 +285,9 @@ def f(): pass "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::default() - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::default()); // Detect from string literal appearing after docstring let contents = r#" @@ -321,11 +296,9 @@ def f(): a = 'v' "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#" '''Module docstring.''' @@ -333,11 +306,9 @@ a = 'v' a = "v" "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); // Detect from f-string appearing after docstring let contents = r#" @@ -346,11 +317,9 @@ a = "v" a = f'v' "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); let contents = r#" '''Module docstring.''' @@ -358,21 +327,17 @@ a = f'v' a = f"v" "#; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Double - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Double); let contents = r" f'''Module docstring.''' "; let locator = Locator::new(contents); - let tokens: Vec<_> = lex(contents, Mode::Module).collect(); - assert_eq!( - Stylist::from_tokens(&tokens, &locator).quote(), - Quote::Single - ); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.quote(), Quote::Single); } #[test] diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 2c5d8ee50896e..a57e4801300ae 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -20,7 +20,6 @@ ruff_macros = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } ruff_python_ast = { workspace = true } -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_text_size = { workspace = true } diff --git a/crates/ruff_python_formatter/src/cli.rs b/crates/ruff_python_formatter/src/cli.rs index 881ca8ffae666..f2f86c7bd1096 100644 --- a/crates/ruff_python_formatter/src/cli.rs +++ b/crates/ruff_python_formatter/src/cli.rs @@ -2,13 +2,12 @@ use std::path::{Path, PathBuf}; -use anyhow::{format_err, Context, Result}; +use anyhow::{Context, Result}; use clap::{command, Parser, ValueEnum}; use ruff_formatter::SourceCode; use ruff_python_ast::PySourceType; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode}; +use ruff_python_parser::{parse, AsMode}; use ruff_text_size::Ranged; use crate::comments::collect_comments; @@ -46,12 +45,9 @@ pub struct Cli { pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Result { let source_type = PySourceType::from(source_path); - let (tokens, comment_ranges) = tokens_and_ranges(source, source_type) - .map_err(|err| format_err!("Source contains syntax errors {err:?}"))?; // Parse the AST. - let module = - parse_tokens(tokens, source, source_type.as_mode()).context("Syntax error in input")?; + let parsed = parse(source, source_type.as_mode()).context("Syntax error in input")?; let options = PyFormatOptions::from_extension(source_path) .with_preview(if cli.preview { @@ -66,14 +62,14 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re }); let source_code = SourceCode::new(source); - let formatted = format_module_ast(&module, &comment_ranges, source, options) - .context("Failed to format node")?; + let formatted = format_module_ast(&parsed, source, options).context("Failed to format node")?; if cli.print_ir { println!("{}", formatted.document().display(source_code)); } if cli.print_comments { // Print preceding, following and enclosing nodes - let decorated_comments = collect_comments(&module, source_code, &comment_ranges); + let decorated_comments = + collect_comments(parsed.syntax(), source_code, parsed.comment_ranges()); if !decorated_comments.is_empty() { println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment"); } diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index 9717252a9b435..3731a082e6830 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -481,15 +481,12 @@ mod tests { use ruff_formatter::SourceCode; use ruff_python_ast::{Mod, PySourceType}; - use ruff_python_index::tokens_and_ranges; - use ruff_python_parser::{parse_tokens, AsMode}; - use ruff_python_trivia::CommentRanges; + use ruff_python_parser::{parse, AsMode, Parsed}; use crate::comments::Comments; struct CommentsTestCase<'a> { - module: Mod, - comment_ranges: CommentRanges, + parsed: Parsed, source_code: SourceCode<'a>, } @@ -497,20 +494,21 @@ mod tests { fn from_code(source: &'a str) -> Self { let source_code = SourceCode::new(source); let source_type = PySourceType::Python; - let (tokens, comment_ranges) = - tokens_and_ranges(source, source_type).expect("Expect source to be valid Python"); - let parsed = parse_tokens(tokens, source, source_type.as_mode()) - .expect("Expect source to be valid Python"); + let parsed = + parse(source, source_type.as_mode()).expect("Expect source to be valid Python"); CommentsTestCase { + parsed, source_code, - module: parsed, - comment_ranges, } } fn to_comments(&self) -> Comments { - Comments::from_ast(&self.module, self.source_code, &self.comment_ranges) + Comments::from_ast( + self.parsed.syntax(), + self.source_code, + self.parsed.comment_ranges(), + ) } } diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 3d5f23590adc3..32169ccf7dc92 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -3,6 +3,7 @@ use crate::other::f_string_element::FStringExpressionElementContext; use crate::PyFormatOptions; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_python_ast::str::Quote; +use ruff_python_parser::Tokens; use ruff_source_file::Locator; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, DerefMut}; @@ -12,6 +13,7 @@ pub struct PyFormatContext<'a> { options: PyFormatOptions, contents: &'a str, comments: Comments<'a>, + tokens: &'a Tokens, node_level: NodeLevel, indent_level: IndentLevel, /// Set to a non-None value when the formatter is running on a code @@ -28,11 +30,17 @@ pub struct PyFormatContext<'a> { } impl<'a> PyFormatContext<'a> { - pub(crate) fn new(options: PyFormatOptions, contents: &'a str, comments: Comments<'a>) -> Self { + pub(crate) fn new( + options: PyFormatOptions, + contents: &'a str, + comments: Comments<'a>, + tokens: &'a Tokens, + ) -> Self { Self { options, contents, comments, + tokens, node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other), indent_level: IndentLevel::new(0), docstring: None, @@ -69,6 +77,10 @@ impl<'a> PyFormatContext<'a> { &self.comments } + pub(crate) fn tokens(&self) -> &'a Tokens { + self.tokens + } + /// Returns a non-None value only if the formatter is running on a code /// snippet within a docstring. /// diff --git a/crates/ruff_python_formatter/src/expression/expr_name.rs b/crates/ruff_python_formatter/src/expression/expr_name.rs index 276ded6dd9a50..5a8b6b2665089 100644 --- a/crates/ruff_python_formatter/src/expression/expr_name.rs +++ b/crates/ruff_python_formatter/src/expression/expr_name.rs @@ -31,15 +31,15 @@ impl NeedsParentheses for ExprName { #[cfg(test)] mod tests { - use ruff_python_parser::parse_program; + use ruff_python_parser::parse_module; use ruff_text_size::{Ranged, TextRange, TextSize}; #[test] fn name_range_with_comments() { - let source = parse_program("a # comment").unwrap(); + let module = parse_module("a # comment").unwrap(); - let expression_statement = source - .body + let expression_statement = module + .suite() .first() .expect("Expected non-empty body") .as_expr_stmt() diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs index 766bb7071a323..c85355922f14e 100644 --- a/crates/ruff_python_formatter/src/expression/parentheses.rs +++ b/crates/ruff_python_formatter/src/expression/parentheses.rs @@ -444,17 +444,16 @@ impl Format> for FormatEmptyParenthesized<'_> { mod tests { use ruff_python_ast::ExpressionRef; use ruff_python_parser::parse_expression; - use ruff_python_trivia::CommentRanges; use crate::expression::parentheses::is_expression_parenthesized; #[test] fn test_has_parentheses() { let expression = r#"(b().c("")).d()"#; - let expr = parse_expression(expression).unwrap(); + let parsed = parse_expression(expression).unwrap(); assert!(!is_expression_parenthesized( - ExpressionRef::from(&expr), - &CommentRanges::default(), + ExpressionRef::from(parsed.expr()), + parsed.comment_ranges(), expression )); } diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 858714abf8dcb..283727ff76cc8 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -6,8 +6,7 @@ use ruff_formatter::prelude::*; use ruff_formatter::{format, write, FormatError, Formatted, PrintError, Printed, SourceCode}; use ruff_python_ast::AstNode; use ruff_python_ast::Mod; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType}; +use ruff_python_parser::{parse, AsMode, ParseError, Parsed}; use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; @@ -114,29 +113,23 @@ pub fn format_module_source( options: PyFormatOptions, ) -> Result { let source_type = options.source_type(); - let (tokens, comment_ranges) = - tokens_and_ranges(source, source_type).map_err(|err| ParseError { - location: err.location(), - error: ParseErrorType::Lexical(err.into_error()), - })?; - let module = parse_tokens(tokens, source, source_type.as_mode())?; - let formatted = format_module_ast(&module, &comment_ranges, source, options)?; + let parsed = parse(source, source_type.as_mode())?; + let formatted = format_module_ast(&parsed, source, options)?; Ok(formatted.print()?) } pub fn format_module_ast<'a>( - module: &'a Mod, - comment_ranges: &'a CommentRanges, + parsed: &'a Parsed, source: &'a str, options: PyFormatOptions, ) -> FormatResult>> { let source_code = SourceCode::new(source); - let comments = Comments::from_ast(module, source_code, comment_ranges); + let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let locator = Locator::new(source); let formatted = format!( - PyFormatContext::new(options, locator.contents(), comments), - [module.format()] + PyFormatContext::new(options, locator.contents(), comments, parsed.tokens()), + [parsed.syntax().format()] )?; formatted .context() @@ -161,8 +154,7 @@ mod tests { use insta::assert_snapshot; use ruff_python_ast::PySourceType; - use ruff_python_index::tokens_and_ranges; - use ruff_python_parser::{parse_tokens, AsMode}; + use ruff_python_parser::{parse, AsMode}; use ruff_text_size::{TextRange, TextSize}; use crate::{format_module_ast, format_module_source, format_range, PyFormatOptions}; @@ -203,13 +195,12 @@ def main() -> None: "#; let source_type = PySourceType::Python; - let (tokens, comment_ranges) = tokens_and_ranges(source, source_type).unwrap(); // Parse the AST. let source_path = "code_inline.py"; - let module = parse_tokens(tokens, source, source_type.as_mode()).unwrap(); + let parsed = parse(source, source_type.as_mode()).unwrap(); let options = PyFormatOptions::from_extension(Path::new(source_path)); - let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap(); + let formatted = format_module_ast(&parsed, source, options).unwrap(); // Uncomment the `dbg` to print the IR. // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR diff --git a/crates/ruff_python_formatter/src/range.rs b/crates/ruff_python_formatter/src/range.rs index 58ea00117c8ba..7e5f152ad7883 100644 --- a/crates/ruff_python_formatter/src/range.rs +++ b/crates/ruff_python_formatter/src/range.rs @@ -5,9 +5,8 @@ use ruff_formatter::{ format, FormatContext, FormatError, FormatOptions, IndentStyle, PrintedRange, SourceCode, }; use ruff_python_ast::visitor::preorder::{walk_body, PreorderVisitor, TraversalSignal}; -use ruff_python_ast::{AnyNode, AnyNodeRef, Stmt, StmtMatch, StmtTry}; -use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType}; +use ruff_python_ast::{AnyNodeRef, Stmt, StmtMatch, StmtTry}; +use ruff_python_parser::{parse, AsMode}; use ruff_python_trivia::{indentation_at_offset, BackwardsTokenizer, SimpleToken, SimpleTokenKind}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -71,32 +70,27 @@ pub fn format_range( return Ok(PrintedRange::new(formatted.into_code(), range)); } - let (tokens, comment_ranges) = - tokens_and_ranges(source, options.source_type()).map_err(|err| ParseError { - location: err.location(), - error: ParseErrorType::Lexical(err.into_error()), - })?; - assert_valid_char_boundaries(range, source); - let module = parse_tokens(tokens, source, options.source_type().as_mode())?; - let root = AnyNode::from(module); + let parsed = parse(source, options.source_type().as_mode())?; let source_code = SourceCode::new(source); - let comments = Comments::from_ast(root.as_ref(), source_code, &comment_ranges); + let comments = Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let mut context = PyFormatContext::new( options.with_source_map_generation(SourceMapGeneration::Enabled), source, comments, + parsed.tokens(), ); - let (enclosing_node, base_indent) = match find_enclosing_node(range, root.as_ref(), &context) { - EnclosingNode::Node { node, indent_level } => (node, indent_level), - EnclosingNode::Suppressed => { - // The entire range falls into a suppressed range. There's nothing to format. - return Ok(PrintedRange::empty()); - } - }; + let (enclosing_node, base_indent) = + match find_enclosing_node(range, AnyNodeRef::from(parsed.syntax()), &context) { + EnclosingNode::Node { node, indent_level } => (node, indent_level), + EnclosingNode::Suppressed => { + // The entire range falls into a suppressed range. There's nothing to format. + return Ok(PrintedRange::empty()); + } + }; let narrowed_range = narrow_range(range, enclosing_node, &context); assert_valid_char_boundaries(narrowed_range, source); diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 7137558c500b3..2df9bca400b83 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -830,8 +830,7 @@ impl Format> for SuiteChildStatement<'_> { #[cfg(test)] mod tests { use ruff_formatter::format; - use ruff_python_parser::parse_suite; - use ruff_python_trivia::CommentRanges; + use ruff_python_parser::parse_module; use crate::comments::Comments; use crate::prelude::*; @@ -860,17 +859,17 @@ def trailing_func(): pass "; - let statements = parse_suite(source).unwrap(); + let parsed = parse_module(source).unwrap(); - let comment_ranges = CommentRanges::default(); let context = PyFormatContext::new( PyFormatOptions::default(), source, - Comments::from_ranges(&comment_ranges), + Comments::from_ranges(parsed.comment_ranges()), + parsed.tokens(), ); let test_formatter = - format_with(|f: &mut PyFormatter| statements.format().with_options(level).fmt(f)); + format_with(|f: &mut PyFormatter| parsed.suite().format().with_options(level).fmt(f)); let formatted = format!(context, [test_formatter]).unwrap(); let printed = formatted.print().unwrap(); diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index 6aefad2a12f08..65de2979b0182 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -9,7 +9,6 @@ use itertools::Itertools; use ruff_formatter::printer::SourceMapGeneration; use ruff_python_ast::{str::Quote, StringFlags}; -use ruff_python_parser::ParseError; use {once_cell::sync::Lazy, regex::Regex}; use { ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, @@ -1552,16 +1551,14 @@ fn docstring_format_source( use ruff_python_parser::AsMode; let source_type = options.source_type(); - let (tokens, comment_ranges) = - ruff_python_index::tokens_and_ranges(source, source_type).map_err(ParseError::from)?; - let module = ruff_python_parser::parse_tokens(tokens, source, source_type.as_mode())?; + let parsed = ruff_python_parser::parse(source, source_type.as_mode())?; let source_code = ruff_formatter::SourceCode::new(source); - let comments = crate::Comments::from_ast(&module, source_code, &comment_ranges); + let comments = crate::Comments::from_ast(parsed.syntax(), source_code, parsed.comment_ranges()); let locator = Locator::new(source); - let ctx = PyFormatContext::new(options, locator.contents(), comments) + let ctx = PyFormatContext::new(options, locator.contents(), comments, parsed.tokens()) .in_docstring(docstring_quote_style); - let formatted = crate::format!(ctx, [module.format()])?; + let formatted = crate::format!(ctx, [parsed.syntax().format()])?; formatted .context() .comments() diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index 94635802ef767..587f2d0690383 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -1,13 +1,13 @@ use std::borrow::Cow; use std::iter::FusedIterator; +use std::slice::Iter; use unicode_width::UnicodeWidthStr; use ruff_formatter::{write, FormatError}; use ruff_python_ast::AnyNodeRef; use ruff_python_ast::Stmt; -use ruff_python_parser::lexer::{lex_starts_at, LexResult}; -use ruff_python_parser::{Mode, Tok}; +use ruff_python_parser::{self as parser, TokenKind}; use ruff_python_trivia::lines_before; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -725,13 +725,10 @@ struct FormatVerbatimStatementRange { impl Format> for FormatVerbatimStatementRange { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - let lexer = lex_starts_at( - &f.context().source()[self.verbatim_range], - Mode::Module, - self.verbatim_range.start(), + let logical_lines = LogicalLinesIter::new( + f.context().tokens().in_range(self.verbatim_range).iter(), + self.verbatim_range, ); - - let logical_lines = LogicalLinesIter::new(lexer, self.verbatim_range); let mut first = true; for logical_line in logical_lines { @@ -784,43 +781,47 @@ impl Format> for FormatVerbatimStatementRange { } } -struct LogicalLinesIter { - lexer: I, +struct LogicalLinesIter<'a> { + tokens: Iter<'a, parser::Token>, // The end of the last logical line last_line_end: TextSize, // The position where the content to lex ends. content_end: TextSize, } -impl LogicalLinesIter { - fn new(lexer: I, verbatim_range: TextRange) -> Self { +impl<'a> LogicalLinesIter<'a> { + fn new(tokens: Iter<'a, parser::Token>, verbatim_range: TextRange) -> Self { Self { - lexer, + tokens, last_line_end: verbatim_range.start(), content_end: verbatim_range.end(), } } } -impl Iterator for LogicalLinesIter -where - I: Iterator, -{ +impl<'a> Iterator for LogicalLinesIter<'a> { type Item = FormatResult; fn next(&mut self) -> Option { let mut parens = 0u32; let (content_end, full_end) = loop { - match self.lexer.next() { - Some(Ok((token, range))) => match token { - Tok::Newline => break (range.start(), range.end()), + match self.tokens.next() { + Some(token) if token.kind() == TokenKind::Unknown => { + return Some(Err(FormatError::syntax_error( + "Unexpected token when lexing verbatim statement range.", + ))) + } + Some(token) => match token.kind() { + TokenKind::Newline => break (token.start(), token.end()), // Ignore if inside an expression - Tok::NonLogicalNewline if parens == 0 => break (range.start(), range.end()), - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + TokenKind::NonLogicalNewline if parens == 0 => { + break (token.start(), token.end()) + } + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens = parens.saturating_add(1); } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { + TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => { parens = parens.saturating_sub(1); } _ => {} @@ -839,11 +840,6 @@ where None }; } - Some(Err(_)) => { - return Some(Err(FormatError::syntax_error( - "Unexpected token when lexing verbatim statement range.", - ))) - } } }; @@ -857,7 +853,7 @@ where } } -impl FusedIterator for LogicalLinesIter where I: Iterator {} +impl<'a> FusedIterator for LogicalLinesIter<'a> {} /// A logical line or a comment (or form feed only) line struct LogicalLine { diff --git a/crates/ruff_python_formatter/tests/fixtures.rs b/crates/ruff_python_formatter/tests/fixtures.rs index a72e505e7a538..0c4da466eeaf1 100644 --- a/crates/ruff_python_formatter/tests/fixtures.rs +++ b/crates/ruff_python_formatter/tests/fixtures.rs @@ -391,13 +391,15 @@ fn ensure_unchanged_ast( // Parse the unformatted code. let mut unformatted_ast = parse(unformatted_code, source_type.as_mode()) - .expect("Unformatted code to be valid syntax"); + .expect("Unformatted code to be valid syntax") + .into_syntax(); Normalizer.visit_module(&mut unformatted_ast); let unformatted_ast = ComparableMod::from(&unformatted_ast); // Parse the formatted code. - let mut formatted_ast = - parse(formatted_code, source_type.as_mode()).expect("Formatted code to be valid syntax"); + let mut formatted_ast = parse(formatted_code, source_type.as_mode()) + .expect("Formatted code to be valid syntax") + .into_syntax(); Normalizer.visit_module(&mut formatted_ast); let formatted_ast = ComparableMod::from(&formatted_ast); diff --git a/crates/ruff_python_index/src/comment_ranges.rs b/crates/ruff_python_index/src/comment_ranges.rs deleted file mode 100644 index e9ef4c04620bf..0000000000000 --- a/crates/ruff_python_index/src/comment_ranges.rs +++ /dev/null @@ -1,44 +0,0 @@ -use std::fmt::Debug; - -use ruff_python_ast::PySourceType; -use ruff_python_parser::lexer::{lex, LexResult, LexicalError}; -use ruff_python_parser::{allocate_tokens_vec, AsMode, Tok}; -use ruff_python_trivia::CommentRanges; -use ruff_text_size::TextRange; - -#[derive(Debug, Clone, Default)] -pub struct CommentRangesBuilder { - ranges: Vec, -} - -impl CommentRangesBuilder { - pub fn visit_token(&mut self, token: &Tok, range: TextRange) { - if token.is_comment() { - self.ranges.push(range); - } - } - - pub fn finish(self) -> CommentRanges { - CommentRanges::new(self.ranges) - } -} - -/// Helper method to lex and extract comment ranges -pub fn tokens_and_ranges( - source: &str, - source_type: PySourceType, -) -> Result<(Vec, CommentRanges), LexicalError> { - let mut tokens = allocate_tokens_vec(source); - let mut comment_ranges = CommentRangesBuilder::default(); - - for result in lex(source, source_type.as_mode()) { - if let Ok((token, range)) = &result { - comment_ranges.visit_token(token, *range); - } - - tokens.push(result); - } - - let comment_ranges = comment_ranges.finish(); - Ok((tokens, comment_ranges)) -} diff --git a/crates/ruff_python_index/src/fstring_ranges.rs b/crates/ruff_python_index/src/fstring_ranges.rs index b92bbd382c6b6..089050334eebc 100644 --- a/crates/ruff_python_index/src/fstring_ranges.rs +++ b/crates/ruff_python_index/src/fstring_ranges.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; -use ruff_python_parser::Tok; -use ruff_text_size::{TextRange, TextSize}; +use ruff_python_parser::{Token, TokenKind}; +use ruff_text_size::{Ranged, TextRange, TextSize}; /// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`]. /// There can be multiple overlapping ranges for nested f-strings. @@ -85,14 +85,14 @@ pub(crate) struct FStringRangesBuilder { } impl FStringRangesBuilder { - pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) { - match token { - Tok::FStringStart(_) => { - self.start_locations.push(range.start()); + pub(crate) fn visit_token(&mut self, token: &Token) { + match token.kind() { + TokenKind::FStringStart => { + self.start_locations.push(token.start()); } - Tok::FStringEnd => { + TokenKind::FStringEnd => { if let Some(start) = self.start_locations.pop() { - self.raw.insert(start, TextRange::new(start, range.end())); + self.raw.insert(start, TextRange::new(start, token.end())); } } _ => {} diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs index d7f7810de6e83..fb813f9814a22 100644 --- a/crates/ruff_python_index/src/indexer.rs +++ b/crates/ruff_python_index/src/indexer.rs @@ -2,21 +2,15 @@ //! are omitted from the AST (e.g., commented lines). use ruff_python_ast::Stmt; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; -use ruff_python_trivia::{ - has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges, -}; +use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder}; use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder}; -use crate::CommentRangesBuilder; pub struct Indexer { - comment_ranges: CommentRanges, - /// Stores the start offset of continuation lines. continuation_lines: Vec, @@ -28,10 +22,9 @@ pub struct Indexer { } impl Indexer { - pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self { + pub fn from_tokens(tokens: &Tokens, locator: &Locator<'_>) -> Self { assert!(TextSize::try_from(locator.contents().len()).is_ok()); - let mut comment_ranges_builder = CommentRangesBuilder::default(); let mut fstring_ranges_builder = FStringRangesBuilder::default(); let mut multiline_ranges_builder = MultilineRangesBuilder::default(); let mut continuation_lines = Vec::new(); @@ -39,8 +32,8 @@ impl Indexer { let mut prev_end = TextSize::default(); let mut line_start = TextSize::default(); - for (tok, range) in tokens.iter().flatten() { - let trivia = locator.slice(TextRange::new(prev_end, range.start())); + for token in tokens.up_to_first_unknown() { + let trivia = locator.slice(TextRange::new(prev_end, token.start())); // Get the trivia between the previous and the current token and detect any newlines. // This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens @@ -59,38 +52,31 @@ impl Indexer { } } - comment_ranges_builder.visit_token(tok, *range); - fstring_ranges_builder.visit_token(tok, *range); - multiline_ranges_builder.visit_token(tok, *range); + fstring_ranges_builder.visit_token(token); + multiline_ranges_builder.visit_token(token); - match tok { - Tok::Newline | Tok::NonLogicalNewline => { - line_start = range.end(); + match token.kind() { + TokenKind::Newline | TokenKind::NonLogicalNewline => { + line_start = token.end(); } - Tok::String { .. } => { + TokenKind::String => { // If the previous token was a string, find the start of the line that contains // the closing delimiter, since the token itself can span multiple lines. - line_start = locator.line_start(range.end()); + line_start = locator.line_start(token.end()); } _ => {} } - prev_end = range.end(); + prev_end = token.end(); } Self { - comment_ranges: comment_ranges_builder.finish(), continuation_lines, fstring_ranges: fstring_ranges_builder.finish(), multiline_ranges: multiline_ranges_builder.finish(), } } - /// Returns the byte offset ranges of comments - pub const fn comment_ranges(&self) -> &CommentRanges { - &self.comment_ranges - } - /// Returns the byte offset ranges of f-strings. pub const fn fstring_ranges(&self) -> &FStringRanges { &self.fstring_ranges @@ -225,19 +211,22 @@ impl Indexer { #[cfg(test)] mod tests { - use ruff_python_parser::lexer::LexResult; - use ruff_python_parser::{lexer, Mode}; + use ruff_python_parser::parse_module; use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; use crate::Indexer; + fn new_indexer(contents: &str) -> Indexer { + let parsed = parse_module(contents).unwrap(); + let locator = Locator::new(contents); + Indexer::from_tokens(parsed.tokens(), &locator) + } + #[test] fn continuation() { let contents = r"x = 1"; - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents)); - assert_eq!(indexer.continuation_line_starts(), &[]); + assert_eq!(new_indexer(contents).continuation_line_starts(), &[]); let contents = r" # Hello, world! @@ -248,9 +237,7 @@ y = 2 " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents)); - assert_eq!(indexer.continuation_line_starts(), &[]); + assert_eq!(new_indexer(contents).continuation_line_starts(), &[]); let contents = r#" x = \ @@ -268,10 +255,8 @@ if True: ) "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 1 TextSize::from(0), @@ -300,10 +285,8 @@ x = 1; \ import os " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 9 TextSize::from(84), @@ -323,10 +306,8 @@ f'foo { 'str1' \ }' " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 1 TextSize::new(0), @@ -348,10 +329,8 @@ x = ( + 2) " .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer.continuation_line_starts(), + new_indexer(contents).continuation_line_starts(), [ // row 3 TextSize::new(12), @@ -373,10 +352,8 @@ f"start {f"inner {f"another"}"} end" f"implicit " f"concatenation" "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer + new_indexer(contents) .fstring_ranges() .values() .copied() @@ -409,10 +386,8 @@ f-string"""} """ "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); assert_eq!( - indexer + new_indexer(contents) .fstring_ranges() .values() .copied() @@ -447,8 +422,7 @@ f-string"""} the end""" "# .trim(); - let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); - let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); + let indexer = new_indexer(contents); // For reference, the ranges of the f-strings in the above code are as // follows where the ones inside parentheses are nested f-strings: diff --git a/crates/ruff_python_index/src/lib.rs b/crates/ruff_python_index/src/lib.rs index 2a4660f0125cf..aabdef1d482ba 100644 --- a/crates/ruff_python_index/src/lib.rs +++ b/crates/ruff_python_index/src/lib.rs @@ -1,7 +1,5 @@ -mod comment_ranges; mod fstring_ranges; mod indexer; mod multiline_ranges; -pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder}; pub use indexer::Indexer; diff --git a/crates/ruff_python_index/src/multiline_ranges.rs b/crates/ruff_python_index/src/multiline_ranges.rs index 8043929aa9e6b..585ff6f1ae8e9 100644 --- a/crates/ruff_python_index/src/multiline_ranges.rs +++ b/crates/ruff_python_index/src/multiline_ranges.rs @@ -1,6 +1,5 @@ -use ruff_python_ast::StringFlags; -use ruff_python_parser::Tok; -use ruff_text_size::TextRange; +use ruff_python_parser::{Token, TokenKind}; +use ruff_text_size::{Ranged, TextRange}; /// Stores the range of all multiline strings in a file sorted by /// [`TextRange::start`]. @@ -46,10 +45,10 @@ pub(crate) struct MultilineRangesBuilder { } impl MultilineRangesBuilder { - pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) { - if let Tok::String { flags, .. } | Tok::FStringMiddle { flags, .. } = token { - if flags.is_triple_quoted() { - self.ranges.push(range); + pub(crate) fn visit_token(&mut self, token: &Token) { + if matches!(token.kind(), TokenKind::String | TokenKind::FStringMiddle) { + if token.is_triple_quoted_string() { + self.ranges.push(token.range()); } } } diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index fc064e6f0a84d..00ac193efedf0 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -14,6 +14,7 @@ license = { workspace = true } [dependencies] ruff_python_ast = { workspace = true } +ruff_python_trivia = { workspace = true } ruff_text_size = { workspace = true } anyhow = { workspace = true } diff --git a/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py b/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py index 0641706d52b8f..355a877b6f71d 100644 --- a/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py +++ b/crates/ruff_python_parser/resources/inline/err/async_unexpected_token.py @@ -2,7 +2,5 @@ async while test: ... async x = 1 async async def foo(): ... -# TODO(dhruvmanila): Here, `match` is actually a Name token because -# of the soft keyword # transformer async match test: case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py new file mode 100644 index 0000000000000..414c60908122f --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py @@ -0,0 +1,2 @@ +match yield foo: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py new file mode 100644 index 0000000000000..cfa7bd3fcc34d --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py @@ -0,0 +1,2 @@ +match *foo: # Keyword + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py b/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py new file mode 100644 index 0000000000000..1f8fb73b4eb95 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/match_expected_colon.py @@ -0,0 +1,2 @@ +match [1, 2] + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py b/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py index 14a32f10e105f..4589cb540115a 100644 --- a/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py +++ b/crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pattern.py @@ -1,3 +1,2 @@ -# TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer match x: case : ... diff --git a/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py new file mode 100644 index 0000000000000..07ccf494ece58 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py @@ -0,0 +1,4 @@ +try: ... +except Exception as match: ... +except Exception as case: ... +except Exception as type: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py b/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py new file mode 100644 index 0000000000000..fb617bd3f43d2 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py @@ -0,0 +1,4 @@ +from match import pattern +from type import bar +from case import pattern +from match.type.case import foo diff --git a/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py new file mode 100644 index 0000000000000..5f68a60cd1b26 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py @@ -0,0 +1,3 @@ +import foo as match +import bar as case +import baz as type diff --git a/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py new file mode 100644 index 0000000000000..c434aa5c8171d --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py @@ -0,0 +1,4 @@ +match foo: + case case: ... + case match: ... + case type: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py b/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py new file mode 100644 index 0000000000000..fa1487f776aad --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py @@ -0,0 +1,5 @@ +match foo: + case match.bar: ... + case case.bar: ... + case type.bar: ... + case match.case.type.bar.type.case.match: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py new file mode 100644 index 0000000000000..bac1f88c78054 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py @@ -0,0 +1 @@ +match not in case diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py new file mode 100644 index 0000000000000..e670a92d87756 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py @@ -0,0 +1,13 @@ +match +match != foo +(foo, match) +[foo, match] +{foo, match} +match; +match: int +match, +match.foo +match / foo +match << foo +match and foo +match is not foo diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py new file mode 100644 index 0000000000000..33835d58256d0 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py @@ -0,0 +1,24 @@ +match foo: + case _: ... +match 1: + case _: ... +match 1.0: + case _: ... +match 1j: + case _: ... +match "foo": + case _: ... +match f"foo {x}": + case _: ... +match {1, 2}: + case _: ... +match ~foo: + case _: ... +match ...: + case _: ... +match not foo: + case _: ... +match await foo(): + case _: ... +match lambda foo: foo: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py new file mode 100644 index 0000000000000..89540dd828a27 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py @@ -0,0 +1,12 @@ +match match: + case _: ... +match case: + case _: ... +match type: + case _: ... +match None: + case _: ... +match True: + case _: ... +match False: + case _: ... diff --git a/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py new file mode 100644 index 0000000000000..8dd8f7dd7eb82 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py @@ -0,0 +1,10 @@ +match (1, 2) # Identifier +match (1, 2): # Keyword + case _: ... +match [1:] # Identifier +match [1, 2]: # Keyword + case _: ... +match * foo # Identifier +match - foo # Identifier +match -foo: # Keyword + case _: ... diff --git a/crates/ruff_python_parser/src/error.rs b/crates/ruff_python_parser/src/error.rs index 08aa223403587..782820e56fdf2 100644 --- a/crates/ruff_python_parser/src/error.rs +++ b/crates/ruff_python_parser/src/error.rs @@ -7,7 +7,7 @@ use crate::TokenKind; /// Represents represent errors that occur during parsing and are /// returned by the `parse_*` functions. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct ParseError { pub error: ParseErrorType, pub location: TextRange, @@ -85,7 +85,7 @@ impl std::fmt::Display for FStringErrorType { } /// Represents the different types of errors that can occur during parsing. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum ParseErrorType { /// An unexpected error occurred. OtherError(String), diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 34d5722047b6a..8933e4cb748a4 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -4,204 +4,192 @@ //! governing what is and is not a valid token are defined in the Python reference //! guide section on [Lexical analysis]. //! -//! The primary function in this module is [`lex`], which takes a string slice -//! and returns an iterator over the tokens in the source code. The tokens are currently returned -//! as a `Result`, where [`Spanned`] is a tuple containing the -//! start and end [`TextSize`] and a [`Tok`] denoting the token. -//! -//! # Example -//! -//! ``` -//! use ruff_python_parser::{lexer::lex, Tok, Mode}; -//! -//! let source = "x = 'RustPython'"; -//! let tokens = lex(source, Mode::Module) -//! .map(|tok| tok.expect("Failed to lex")) -//! .collect::>(); -//! -//! for (token, range) in tokens { -//! println!( -//! "{token:?}@{range:?}", -//! ); -//! } -//! ``` -//! //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html -use std::iter::FusedIterator; use std::{char, cmp::Ordering, str::FromStr}; +use bitflags::bitflags; +use ruff_python_ast::str::Quote; +use ruff_python_ast::str_prefix::{ + AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, +}; use unicode_ident::{is_xid_continue, is_xid_start}; use unicode_normalization::UnicodeNormalization; -use ruff_python_ast::{ - str::Quote, - str_prefix::{AnyStringPrefix, FStringPrefix}, - AnyStringFlags, Int, IpyEscapeKind, StringFlags, -}; -use ruff_text_size::{TextLen, TextRange, TextSize}; +use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::error::FStringErrorType; use crate::lexer::cursor::{Cursor, EOF_CHAR}; -use crate::lexer::fstring::{FStringContext, FStrings}; -use crate::lexer::indentation::{Indentation, Indentations}; -use crate::soft_keywords::SoftKeywordTransformer; -use crate::token::Tok; -use crate::Mode; +use crate::lexer::fstring::{FStringContext, FStrings, FStringsCheckpoint}; +use crate::lexer::indentation::{Indentation, Indentations, IndentationsCheckpoint}; +use crate::{Mode, TokenKind}; mod cursor; mod fstring; mod indentation; /// A lexer for Python source code. -pub struct Lexer<'source> { - // Contains the source code to be lexed. - cursor: Cursor<'source>, - source: &'source str, - - state: State, - // Amount of parenthesis. - nesting: u32, - // Indentation levels. - indentations: Indentations, - pending_indentation: Option, - // Lexer mode. - mode: Mode, - // F-string contexts. - fstrings: FStrings, -} +#[derive(Debug)] +pub struct Lexer<'src> { + /// Source code to be lexed. + source: &'src str, -/// Contains a Token along with its `range`. -pub type Spanned = (Tok, TextRange); -/// The result of lexing a token. -pub type LexResult = Result; + /// A pointer to the current character of the source code which is being lexed. + cursor: Cursor<'src>, -/// Create a new lexer from a source string. -/// -/// # Examples -/// -/// ``` -/// use ruff_python_parser::{Mode, lexer::lex}; -/// -/// let source = "def hello(): return 'world'"; -/// let lexer = lex(source, Mode::Module); -/// -/// for token in lexer { -/// println!("{:?}", token); -/// } -/// ``` -#[inline] -pub fn lex(source: &str, mode: Mode) -> SoftKeywordTransformer { - SoftKeywordTransformer::new(Lexer::new(source, mode), mode) -} + /// The kind of the current token. + current_kind: TokenKind, -pub struct LexStartsAtIterator { - start_offset: TextSize, - inner: I, -} + /// The range of the current token. + current_range: TextRange, -impl Iterator for LexStartsAtIterator -where - I: Iterator, -{ - type Item = LexResult; + /// The value of the current token. + current_value: TokenValue, - #[inline] - fn next(&mut self) -> Option { - let result = match self.inner.next()? { - Ok((tok, range)) => Ok((tok, range + self.start_offset)), - Err(error) => { - let location = error.location() + self.start_offset; - Err(LexicalError::new(error.into_error(), location)) - } - }; + /// Flags for the current token. + current_flags: TokenFlags, - Some(result) - } + /// Lexer state. + state: State, - fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() - } -} + /// Represents the current level of nesting in the lexer, indicating the depth of parentheses. + /// The lexer is within a parenthesized context if the value is greater than 0. + nesting: u32, -impl FusedIterator for LexStartsAtIterator where I: Iterator + FusedIterator {} -impl ExactSizeIterator for LexStartsAtIterator where - I: Iterator + ExactSizeIterator -{ -} + /// A stack of indentation representing the current indentation level. + indentations: Indentations, + pending_indentation: Option, -/// Create a new lexer from a source string, starting at a given location. -/// You probably want to use [`lex`] instead. -pub fn lex_starts_at( - source: &str, + /// Lexer mode. mode: Mode, - start_offset: TextSize, -) -> LexStartsAtIterator> { - LexStartsAtIterator { - start_offset, - inner: lex(source, mode), - } + + /// F-string contexts. + fstrings: FStrings, + + /// Errors encountered while lexing. + errors: Vec, } -impl<'source> Lexer<'source> { - /// Create a new lexer from T and a starting location. You probably want to use - /// [`lex`] instead. - pub fn new(input: &'source str, mode: Mode) -> Self { +impl<'src> Lexer<'src> { + /// Create a new lexer for the given input source which starts at the given offset. + /// + /// If the start offset is greater than 0, the cursor is moved ahead that many bytes. + /// This means that the input source should be the complete source code and not the + /// sliced version. + pub(crate) fn new(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { assert!( - u32::try_from(input.len()).is_ok(), + u32::try_from(source.len()).is_ok(), "Lexer only supports files with a size up to 4GB" ); - let mut lxr = Lexer { + let mut lexer = Lexer { + source, + cursor: Cursor::new(source), state: State::AfterNewline, + current_kind: TokenKind::EndOfFile, + current_range: TextRange::empty(start_offset), + current_value: TokenValue::None, + current_flags: TokenFlags::empty(), nesting: 0, indentations: Indentations::default(), pending_indentation: None, - - source: input, - cursor: Cursor::new(input), mode, fstrings: FStrings::default(), + errors: Vec::new(), }; + // TODO: Handle possible mismatch between BOM and explicit encoding declaration. // spell-checker:ignore feff - lxr.cursor.eat_char('\u{feff}'); + lexer.cursor.eat_char('\u{feff}'); + + if start_offset > TextSize::new(0) { + lexer.cursor.skip_bytes(start_offset.to_usize()); + } - lxr + lexer } - /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. - fn lex_identifier(&mut self, first: char) -> Result { - // Detect potential string like rb'' b'' f'' u'' r'' - match (first, self.cursor.first()) { - ('f' | 'F', quote @ ('\'' | '"')) => { - self.cursor.bump(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Regular)); + /// Returns the kind of the current token. + pub(crate) fn current_kind(&self) -> TokenKind { + self.current_kind + } + + /// Returns the range of the current token. + pub(crate) fn current_range(&self) -> TextRange { + self.current_range + } + + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.current_flags + } + + /// Helper function to push the given error and return the [`TokenKind::Unknown`] token. + fn push_error(&mut self, error: LexicalError) -> TokenKind { + self.errors.push(error); + TokenKind::Unknown + } + + /// Try lexing the single character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_single_char_prefix(&mut self, first: char) -> bool { + match first { + 'f' | 'F' => self.current_flags |= TokenFlags::F_STRING, + 'u' | 'U' => self.current_flags |= TokenFlags::UNICODE_STRING, + 'b' | 'B' => self.current_flags |= TokenFlags::BYTE_STRING, + 'r' => self.current_flags |= TokenFlags::RAW_STRING_LOWERCASE, + 'R' => self.current_flags |= TokenFlags::RAW_STRING_UPPERCASE, + _ => return false, + } + true + } + + /// Try lexing the double character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_double_char_prefix(&mut self, value: [char; 2]) -> bool { + match value { + ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_LOWERCASE; } - ('r', 'f' | 'F') | ('f' | 'F', 'r') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: false })); + ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_UPPERCASE; } - ('R', 'f' | 'F') | ('f' | 'F', 'R') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true })); + ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_LOWERCASE; } - (_, quote @ ('\'' | '"')) => { - if let Ok(prefix) = AnyStringPrefix::try_from(first) { - self.cursor.bump(); - return self.lex_string(prefix, quote); - } + ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_UPPERCASE; } - (_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => { + _ => return false, + } + true + } + + /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. + fn lex_identifier(&mut self, first: char) -> TokenKind { + // Detect potential string like rb'' b'' f'' u'' r'' + let quote = match (first, self.cursor.first()) { + (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then(|| { self.cursor.bump(); - if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) { - let quote = self.cursor.bump().unwrap(); - return self.lex_string(prefix, quote); - } + quote + }), + (_, second) if is_quote(self.cursor.second()) => { + self.try_double_char_prefix([first, second]).then(|| { + self.cursor.bump(); + // SAFETY: Safe because of the `is_quote` check in this match arm's guard + self.cursor.bump().unwrap() + }) } - _ => {} + _ => None, + }; + + if let Some(quote) = quote { + if self.current_flags.is_f_string() { + return self.lex_fstring_start(quote); + } + + return self.lex_string(quote); } // Keep track of whether the identifier is ASCII-only or not. @@ -218,62 +206,58 @@ impl<'source> Lexer<'source> { let text = self.token_text(); if !is_ascii { - return Ok(Tok::Name { - name: text.nfkc().collect::().into_boxed_str(), - }); + self.current_value = TokenValue::Name(text.nfkc().collect::().into_boxed_str()); + return TokenKind::Name; } - let keyword = match text { - "False" => Tok::False, - "None" => Tok::None, - "True" => Tok::True, - "and" => Tok::And, - "as" => Tok::As, - "assert" => Tok::Assert, - "async" => Tok::Async, - "await" => Tok::Await, - "break" => Tok::Break, - "case" => Tok::Case, - "class" => Tok::Class, - "continue" => Tok::Continue, - "def" => Tok::Def, - "del" => Tok::Del, - "elif" => Tok::Elif, - "else" => Tok::Else, - "except" => Tok::Except, - "finally" => Tok::Finally, - "for" => Tok::For, - "from" => Tok::From, - "global" => Tok::Global, - "if" => Tok::If, - "import" => Tok::Import, - "in" => Tok::In, - "is" => Tok::Is, - "lambda" => Tok::Lambda, - "match" => Tok::Match, - "nonlocal" => Tok::Nonlocal, - "not" => Tok::Not, - "or" => Tok::Or, - "pass" => Tok::Pass, - "raise" => Tok::Raise, - "return" => Tok::Return, - "try" => Tok::Try, - "type" => Tok::Type, - "while" => Tok::While, - "with" => Tok::With, - "yield" => Tok::Yield, + match text { + "False" => TokenKind::False, + "None" => TokenKind::None, + "True" => TokenKind::True, + "and" => TokenKind::And, + "as" => TokenKind::As, + "assert" => TokenKind::Assert, + "async" => TokenKind::Async, + "await" => TokenKind::Await, + "break" => TokenKind::Break, + "case" => TokenKind::Case, + "class" => TokenKind::Class, + "continue" => TokenKind::Continue, + "def" => TokenKind::Def, + "del" => TokenKind::Del, + "elif" => TokenKind::Elif, + "else" => TokenKind::Else, + "except" => TokenKind::Except, + "finally" => TokenKind::Finally, + "for" => TokenKind::For, + "from" => TokenKind::From, + "global" => TokenKind::Global, + "if" => TokenKind::If, + "import" => TokenKind::Import, + "in" => TokenKind::In, + "is" => TokenKind::Is, + "lambda" => TokenKind::Lambda, + "match" => TokenKind::Match, + "nonlocal" => TokenKind::Nonlocal, + "not" => TokenKind::Not, + "or" => TokenKind::Or, + "pass" => TokenKind::Pass, + "raise" => TokenKind::Raise, + "return" => TokenKind::Return, + "try" => TokenKind::Try, + "type" => TokenKind::Type, + "while" => TokenKind::While, + "with" => TokenKind::With, + "yield" => TokenKind::Yield, _ => { - return Ok(Tok::Name { - name: text.to_string().into_boxed_str(), - }) + self.current_value = TokenValue::Name(text.to_string().into_boxed_str()); + TokenKind::Name } - }; - - Ok(keyword) + } } /// Numeric lexing. The feast can start! - fn lex_number(&mut self, first: char) -> Result { + fn lex_number(&mut self, first: char) -> TokenKind { if first == '0' { if self.cursor.eat_if(|c| matches!(c, 'x' | 'X')).is_some() { self.lex_number_radix(Radix::Hex) @@ -290,7 +274,7 @@ impl<'source> Lexer<'source> { } /// Lex a hex/octal/decimal/binary number without a decimal point. - fn lex_number_radix(&mut self, radix: Radix) -> Result { + fn lex_number_radix(&mut self, radix: Radix) -> TokenKind { #[cfg(debug_assertions)] debug_assert!(matches!( self.cursor.previous().to_ascii_lowercase(), @@ -307,17 +291,18 @@ impl<'source> Lexer<'source> { let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) { Ok(int) => int, Err(err) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), )); } }; - Ok(Tok::Int { value }) + self.current_value = TokenValue::Int(value); + TokenKind::Int } /// Lex a normal number, that is, no octal, hex or binary number. - fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> Result { + fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert!(self.cursor.previous().is_ascii_digit() || self.cursor.previous() == '.'); let start_is_zero = first_digit_or_dot == '0'; @@ -332,7 +317,7 @@ impl<'source> Lexer<'source> { number.push('.'); if self.cursor.eat_char('_') { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()), TextRange::new(self.offset() - TextSize::new(1), self.offset()), )); @@ -363,35 +348,38 @@ impl<'source> Lexer<'source> { if is_float { // Improvement: Use `Cow` instead of pushing to value text - let value = f64::from_str(number.as_str()).map_err(|_| { - LexicalError::new( + let Ok(value) = f64::from_str(number.as_str()) else { + return self.push_error(LexicalError::new( LexicalErrorType::OtherError( "Invalid decimal literal".to_string().into_boxed_str(), ), self.token_range(), - ) - })?; + )); + }; // Parse trailing 'j': if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { - Ok(Tok::Complex { + self.current_value = TokenValue::Complex { real: 0.0, imag: value, - }) + }; + TokenKind::Complex } else { - Ok(Tok::Float { value }) + self.current_value = TokenValue::Float(value); + TokenKind::Float } } else { // Parse trailing 'j': if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { let imag = f64::from_str(number.as_str()).unwrap(); - Ok(Tok::Complex { real: 0.0, imag }) + self.current_value = TokenValue::Complex { real: 0.0, imag }; + TokenKind::Complex } else { let value = match Int::from_str(number.as_str()) { Ok(value) => { if start_is_zero && value.as_u8() != Some(0) { // Leading zeros in decimal integer literals are not permitted. - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError( "Invalid decimal integer literal" .to_string() @@ -403,13 +391,14 @@ impl<'source> Lexer<'source> { value } Err(err) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), )) } }; - Ok(Tok::Int { value }) + self.current_value = TokenValue::Int(value); + TokenKind::Int } } } @@ -434,7 +423,7 @@ impl<'source> Lexer<'source> { } /// Lex a single comment. - fn lex_comment(&mut self) -> Tok { + fn lex_comment(&mut self) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), '#'); @@ -442,11 +431,11 @@ impl<'source> Lexer<'source> { let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len()); self.cursor.skip_bytes(offset); - Tok::Comment(self.token_text().to_string().into_boxed_str()) + TokenKind::Comment } /// Lex a single IPython escape command. - fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> Tok { + fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> TokenKind { let mut value = String::new(); loop { @@ -539,16 +528,21 @@ impl<'source> Lexer<'source> { 2 => IpyEscapeKind::Help2, _ => unreachable!("`question_count` is always 1 or 2"), }; - return Tok::IpyEscapeCommand { + + self.current_value = TokenValue::IpyEscapeCommand { kind, value: value.into_boxed_str(), }; + + return TokenKind::IpyEscapeCommand; } '\n' | '\r' | EOF_CHAR => { - return Tok::IpyEscapeCommand { + self.current_value = TokenValue::IpyEscapeCommand { kind: escape_kind, value: value.into_boxed_str(), }; + + return TokenKind::IpyEscapeCommand; } c => { self.cursor.bump(); @@ -559,40 +553,39 @@ impl<'source> Lexer<'source> { } /// Lex a f-string start token. - fn lex_fstring_start(&mut self, quote: char, prefix: FStringPrefix) -> Tok { + fn lex_fstring_start(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(AnyStringPrefix::Format(prefix)) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; + } if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; } - self.fstrings.push(FStringContext::new(flags, self.nesting)); - Tok::FStringStart(flags) + self.fstrings + .push(FStringContext::new(self.current_flags, self.nesting)); + + TokenKind::FStringStart } /// Lex a f-string middle or end token. - fn lex_fstring_middle_or_end(&mut self) -> Result, LexicalError> { + fn lex_fstring_middle_or_end(&mut self) -> Option { // SAFETY: Safe because the function is only called when `self.fstrings` is not empty. let fstring = self.fstrings.current().unwrap(); - self.cursor.start_token(); // Check if we're at the end of the f-string. if fstring.is_triple_quoted() { let quote_char = fstring.quote_char(); if self.cursor.eat_char3(quote_char, quote_char, quote_char) { - return Ok(Some(Tok::FStringEnd)); + self.current_flags = fstring.flags(); + return Some(TokenKind::FStringEnd); } } else if self.cursor.eat_char(fstring.quote_char()) { - return Ok(Some(Tok::FStringEnd)); + self.current_flags = fstring.flags(); + return Some(TokenKind::FStringEnd); } // We have to decode `{{` and `}}` into `{` and `}` respectively. As an @@ -619,10 +612,11 @@ impl<'source> Lexer<'source> { } else { FStringErrorType::UnterminatedString }; - return Err(LexicalError::new( + self.fstrings.pop(); + return Some(self.push_error(LexicalError::new( LexicalErrorType::FStringError(error), self.token_range(), - )); + ))); } '\n' | '\r' if !fstring.is_triple_quoted() => { // If we encounter a newline while we're in a format spec, then @@ -632,10 +626,11 @@ impl<'source> Lexer<'source> { if in_format_spec { break; } - return Err(LexicalError::new( + self.fstrings.pop(); + return Some(self.push_error(LexicalError::new( LexicalErrorType::FStringError(FStringErrorType::UnterminatedString), self.token_range(), - )); + ))); } '\\' => { self.cursor.bump(); // '\' @@ -698,7 +693,7 @@ impl<'source> Lexer<'source> { } let range = self.token_range(); if range.is_empty() { - return Ok(None); + return None; } let value = if normalized.is_empty() { @@ -707,42 +702,39 @@ impl<'source> Lexer<'source> { normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]); normalized }; - Ok(Some(Tok::FStringMiddle { - value: value.into_boxed_str(), - flags: fstring.flags(), - })) + + self.current_value = TokenValue::FStringMiddle(value.into_boxed_str()); + self.current_flags = fstring.flags(); + + Some(TokenKind::FStringMiddle) } /// Lex a string literal. - fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result { + fn lex_string(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(prefix) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; + } // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; } let value_start = self.offset(); let quote_byte = u8::try_from(quote).expect("char that fits in u8"); - let value_end = if flags.is_triple_quoted() { + let value_end = if self.current_flags.is_triple_quoted() { // For triple-quoted strings, scan until we find the closing quote (ignoring escaped // quotes) or the end of the file. loop { let Some(index) = memchr::memchr(quote_byte, self.cursor.rest().as_bytes()) else { self.cursor.skip_to_end(); - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnclosedStringError, self.token_range(), )); @@ -778,7 +770,7 @@ impl<'source> Lexer<'source> { else { self.cursor.skip_to_end(); - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::StringError, self.token_range(), )); @@ -806,7 +798,7 @@ impl<'source> Lexer<'source> { match ch { Some('\r' | '\n') => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnclosedStringError, self.token_range(), )); @@ -819,34 +811,33 @@ impl<'source> Lexer<'source> { } }; - Ok(Tok::String { - value: self.source[TextRange::new(value_start, value_end)] + self.current_value = TokenValue::String( + self.source[TextRange::new(value_start, value_end)] .to_string() .into_boxed_str(), - flags, - }) + ); + + TokenKind::String } - // This is the main entry point. Call this function to retrieve the next token. - // This function is used by the iterator implementation. - pub fn next_token(&mut self) -> LexResult { + /// Lex the next token. + pub fn next_token(&mut self) -> TokenKind { + self.cursor.start_token(); + self.current_value = TokenValue::None; + self.current_flags = TokenFlags::empty(); + self.current_kind = self.lex_token(); + self.current_range = self.token_range(); + self.current_kind + } + + fn lex_token(&mut self) -> TokenKind { if let Some(fstring) = self.fstrings.current() { if !fstring.is_in_expression(self.nesting) { - match self.lex_fstring_middle_or_end() { - Ok(Some(tok)) => { - if tok.is_f_string_end() { - self.fstrings.pop(); - } - return Ok((tok, self.token_range())); - } - Err(e) => { - // This is to prevent an infinite loop in which the lexer - // continuously returns an error token because the f-string - // remains on the stack. + if let Some(token) = self.lex_fstring_middle_or_end() { + if matches!(token, TokenKind::FStringEnd) { self.fstrings.pop(); - return Err(e); } - _ => {} + return token; } } } @@ -855,15 +846,17 @@ impl<'source> Lexer<'source> { match self.indentations.current().try_compare(indentation) { Ok(Ordering::Greater) => { self.pending_indentation = Some(indentation); - let offset = self.offset(); - self.indentations.dedent_one(indentation).map_err(|_| { - LexicalError::new(LexicalErrorType::IndentationError, self.token_range()) - })?; - return Ok((Tok::Dedent, TextRange::empty(offset))); + if self.indentations.dedent_one(indentation).is_err() { + return self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + )); + } + return TokenKind::Dedent; } Ok(_) => {} Err(_) => { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::IndentationError, self.token_range(), )); @@ -872,24 +865,28 @@ impl<'source> Lexer<'source> { } if self.state.is_after_newline() { - if let Some(indentation) = self.eat_indentation()? { - return Ok(indentation); + if let Some(indentation) = self.eat_indentation() { + return indentation; } } else { - self.skip_whitespace()?; + if let Err(error) = self.skip_whitespace() { + return self.push_error(error); + } } + // The lexer might've skipped whitespaces, so update the start offset self.cursor.start_token(); + if let Some(c) = self.cursor.bump() { if c.is_ascii() { self.consume_ascii_character(c) } else if is_unicode_identifier_start(c) { - let identifier = self.lex_identifier(c)?; + let identifier = self.lex_identifier(c); self.state = State::Other; - Ok((identifier, self.token_range())) + identifier } else { - Err(LexicalError::new( + self.push_error(LexicalError::new( LexicalErrorType::UnrecognizedToken { tok: c }, self.token_range(), )) @@ -934,9 +931,8 @@ impl<'source> Lexer<'source> { Ok(()) } - fn eat_indentation(&mut self) -> Result, LexicalError> { + fn eat_indentation(&mut self) -> Option { let mut indentation = Indentation::root(); - self.cursor.start_token(); loop { match self.cursor.first() { @@ -953,12 +949,15 @@ impl<'source> Lexer<'source> { if self.cursor.eat_char('\r') { self.cursor.eat_char('\n'); } else if self.cursor.is_eof() { - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + return Some(self.push_error(LexicalError::new( + LexicalErrorType::Eof, + self.token_range(), + ))); } else if !self.cursor.eat_char('\n') { - return Err(LexicalError::new( + return Some(self.push_error(LexicalError::new( LexicalErrorType::LineContinuationError, self.token_range(), - )); + ))); } indentation = Indentation::root(); } @@ -975,30 +974,42 @@ impl<'source> Lexer<'source> { if !matches!(self.cursor.first(), '\n' | '\r' | '#' | EOF_CHAR) { self.state = State::NonEmptyLogicalLine; - if let Some(spanned) = self.handle_indentation(indentation)? { - // Set to false so that we don't handle indentation on the next call. - - return Ok(Some(spanned)); - } + // Set to false so that we don't handle indentation on the next call. + return self.handle_indentation(indentation); } - Ok(None) + None } - fn handle_indentation( - &mut self, - indentation: Indentation, - ) -> Result, LexicalError> { + fn handle_indentation(&mut self, indentation: Indentation) -> Option { let token = match self.indentations.current().try_compare(indentation) { // Dedent Ok(Ordering::Greater) => { self.pending_indentation = Some(indentation); - self.indentations.dedent_one(indentation).map_err(|_| { - LexicalError::new(LexicalErrorType::IndentationError, self.token_range()) - })?; + if self.indentations.dedent_one(indentation).is_err() { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + ))); + }; - Some((Tok::Dedent, TextRange::empty(self.offset()))) + // The lexer might've eaten some whitespaces to calculate the `indentation`. For + // example: + // + // ```py + // if first: + // if second: + // pass + // foo + // # ^ + // ``` + // + // Here, the cursor is at `^` and the `indentation` contains the whitespaces before + // the `pass` token. + self.cursor.start_token(); + + Some(TokenKind::Dedent) } Ok(Ordering::Equal) => None, @@ -1006,74 +1017,74 @@ impl<'source> Lexer<'source> { // Indent Ok(Ordering::Less) => { self.indentations.indent(indentation); - Some((Tok::Indent, self.token_range())) + Some(TokenKind::Indent) } Err(_) => { - return Err(LexicalError::new( + return Some(self.push_error(LexicalError::new( LexicalErrorType::IndentationError, self.token_range(), - )); + ))); } }; - Ok(token) + token } - fn consume_end(&mut self) -> Result { + fn consume_end(&mut self) -> TokenKind { // We reached end of file. // First of all, we need all nestings to be finished. if self.nesting > 0 { // Reset the nesting to avoid going into infinite loop. self.nesting = 0; - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); } // Next, insert a trailing newline, if required. if !self.state.is_new_logical_line() { self.state = State::AfterNewline; - Ok((Tok::Newline, TextRange::empty(self.offset()))) + TokenKind::Newline } // Next, flush the indentation stack to zero. else if self.indentations.dedent().is_some() { - Ok((Tok::Dedent, TextRange::empty(self.offset()))) + TokenKind::Dedent } else { - Ok((Tok::EndOfFile, TextRange::empty(self.offset()))) + TokenKind::EndOfFile } } // Dispatch based on the given character. - fn consume_ascii_character(&mut self, c: char) -> Result { + fn consume_ascii_character(&mut self, c: char) -> TokenKind { let token = match c { - c if is_ascii_identifier_start(c) => self.lex_identifier(c)?, - '0'..='9' => self.lex_number(c)?, - '#' => return Ok((self.lex_comment(), self.token_range())), - '\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?, + c if is_ascii_identifier_start(c) => self.lex_identifier(c), + '0'..='9' => self.lex_number(c), + '#' => return self.lex_comment(), + '\'' | '"' => self.lex_string(c), '=' => { if self.cursor.eat_char('=') { - Tok::EqEqual + TokenKind::EqEqual } else { self.state = State::AfterEqual; - return Ok((Tok::Equal, self.token_range())); + return TokenKind::Equal; } } '+' => { if self.cursor.eat_char('=') { - Tok::PlusEqual + TokenKind::PlusEqual } else { - Tok::Plus + TokenKind::Plus } } '*' => { if self.cursor.eat_char('=') { - Tok::StarEqual + TokenKind::StarEqual } else if self.cursor.eat_char('*') { if self.cursor.eat_char('=') { - Tok::DoubleStarEqual + TokenKind::DoubleStarEqual } else { - Tok::DoubleStar + TokenKind::DoubleStar } } else { - Tok::Star + TokenKind::Star } } @@ -1100,97 +1111,97 @@ impl<'source> Lexer<'source> { self.lex_ipython_escape_command(kind) } - '?' if self.mode == Mode::Ipython => Tok::Question, + '?' if self.mode == Mode::Ipython => TokenKind::Question, '/' => { if self.cursor.eat_char('=') { - Tok::SlashEqual + TokenKind::SlashEqual } else if self.cursor.eat_char('/') { if self.cursor.eat_char('=') { - Tok::DoubleSlashEqual + TokenKind::DoubleSlashEqual } else { - Tok::DoubleSlash + TokenKind::DoubleSlash } } else { - Tok::Slash + TokenKind::Slash } } '%' => { if self.cursor.eat_char('=') { - Tok::PercentEqual + TokenKind::PercentEqual } else { - Tok::Percent + TokenKind::Percent } } '|' => { if self.cursor.eat_char('=') { - Tok::VbarEqual + TokenKind::VbarEqual } else { - Tok::Vbar + TokenKind::Vbar } } '^' => { if self.cursor.eat_char('=') { - Tok::CircumflexEqual + TokenKind::CircumflexEqual } else { - Tok::CircumFlex + TokenKind::CircumFlex } } '&' => { if self.cursor.eat_char('=') { - Tok::AmperEqual + TokenKind::AmperEqual } else { - Tok::Amper + TokenKind::Amper } } '-' => { if self.cursor.eat_char('=') { - Tok::MinusEqual + TokenKind::MinusEqual } else if self.cursor.eat_char('>') { - Tok::Rarrow + TokenKind::Rarrow } else { - Tok::Minus + TokenKind::Minus } } '@' => { if self.cursor.eat_char('=') { - Tok::AtEqual + TokenKind::AtEqual } else { - Tok::At + TokenKind::At } } '!' => { if self.cursor.eat_char('=') { - Tok::NotEqual + TokenKind::NotEqual } else { - Tok::Exclamation + TokenKind::Exclamation } } - '~' => Tok::Tilde, + '~' => TokenKind::Tilde, '(' => { self.nesting += 1; - Tok::Lpar + TokenKind::Lpar } ')' => { self.nesting = self.nesting.saturating_sub(1); - Tok::Rpar + TokenKind::Rpar } '[' => { self.nesting += 1; - Tok::Lsqb + TokenKind::Lsqb } ']' => { self.nesting = self.nesting.saturating_sub(1); - Tok::Rsqb + TokenKind::Rsqb } '{' => { self.nesting += 1; - Tok::Lbrace + TokenKind::Lbrace } '}' => { if let Some(fstring) = self.fstrings.current_mut() { if fstring.nesting() == self.nesting { - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::FStringError(FStringErrorType::SingleRbrace), self.token_range(), )); @@ -1198,7 +1209,7 @@ impl<'source> Lexer<'source> { fstring.try_end_format_spec(self.nesting); } self.nesting = self.nesting.saturating_sub(1); - Tok::Rbrace + TokenKind::Rbrace } ':' => { if self @@ -1206,85 +1217,79 @@ impl<'source> Lexer<'source> { .current_mut() .is_some_and(|fstring| fstring.try_start_format_spec(self.nesting)) { - Tok::Colon + TokenKind::Colon } else if self.cursor.eat_char('=') { - Tok::ColonEqual + TokenKind::ColonEqual } else { - Tok::Colon + TokenKind::Colon } } - ';' => Tok::Semi, + ';' => TokenKind::Semi, '<' => { if self.cursor.eat_char('<') { if self.cursor.eat_char('=') { - Tok::LeftShiftEqual + TokenKind::LeftShiftEqual } else { - Tok::LeftShift + TokenKind::LeftShift } } else if self.cursor.eat_char('=') { - Tok::LessEqual + TokenKind::LessEqual } else { - Tok::Less + TokenKind::Less } } '>' => { if self.cursor.eat_char('>') { if self.cursor.eat_char('=') { - Tok::RightShiftEqual + TokenKind::RightShiftEqual } else { - Tok::RightShift + TokenKind::RightShift } } else if self.cursor.eat_char('=') { - Tok::GreaterEqual + TokenKind::GreaterEqual } else { - Tok::Greater + TokenKind::Greater } } - ',' => Tok::Comma, + ',' => TokenKind::Comma, '.' => { if self.cursor.first().is_ascii_digit() { - self.lex_decimal_number('.')? + self.lex_decimal_number('.') } else if self.cursor.eat_char2('.', '.') { - Tok::Ellipsis + TokenKind::Ellipsis } else { - Tok::Dot + TokenKind::Dot } } '\n' => { - return Ok(( - if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - Tok::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } - Tok::NonLogicalNewline - }, - self.token_range(), - )) + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } + TokenKind::NonLogicalNewline + } } '\r' => { self.cursor.eat_char('\n'); - return Ok(( - if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - Tok::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } - Tok::NonLogicalNewline - }, - self.token_range(), - )); + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } + TokenKind::NonLogicalNewline + }; } _ => { self.state = State::Other; - return Err(LexicalError::new( + return self.push_error(LexicalError::new( LexicalErrorType::UnrecognizedToken { tok: c }, self.token_range(), )); @@ -1293,7 +1298,7 @@ impl<'source> Lexer<'source> { self.state = State::Other; - Ok((token, self.token_range())) + token } #[inline] @@ -1305,11 +1310,12 @@ impl<'source> Lexer<'source> { } #[inline] - fn token_text(&self) -> &'source str { + fn token_text(&self) -> &'src str { &self.source[self.token_range()] } - // Lexer doesn't allow files larger than 4GB + /// Retrieves the current offset of the cursor within the source code. + // SAFETY: Lexer doesn't allow files larger than 4GB #[allow(clippy::cast_possible_truncation)] #[inline] fn offset(&self) -> TextSize { @@ -1320,25 +1326,220 @@ impl<'source> Lexer<'source> { fn token_start(&self) -> TextSize { self.token_range().start() } + + /// Takes the token value corresponding to the current token out of the lexer, replacing it + /// with the default value. + /// + /// All the subsequent call to this method without moving the lexer would always return the + /// default value which is [`TokenValue::None`]. + pub(crate) fn take_value(&mut self) -> TokenValue { + std::mem::take(&mut self.current_value) + } + + /// Creates a checkpoint to which the lexer can later return to using [`Self::rewind`]. + pub(crate) fn checkpoint(&self) -> LexerCheckpoint<'src> { + LexerCheckpoint { + value: self.current_value.clone(), + current_kind: self.current_kind, + current_range: self.current_range, + current_flags: self.current_flags, + cursor: self.cursor.clone(), + state: self.state, + nesting: self.nesting, + indentations_checkpoint: self.indentations.checkpoint(), + pending_indentation: self.pending_indentation, + fstrings_checkpoint: self.fstrings.checkpoint(), + errors_position: self.errors.len(), + } + } + + /// Restore the lexer to the given checkpoint. + pub(crate) fn rewind(&mut self, checkpoint: LexerCheckpoint<'src>) { + let LexerCheckpoint { + value, + current_kind, + current_range, + current_flags, + cursor, + state, + nesting, + indentations_checkpoint, + pending_indentation, + fstrings_checkpoint, + errors_position, + } = checkpoint; + + self.current_value = value; + self.current_kind = current_kind; + self.current_range = current_range; + self.current_flags = current_flags; + self.cursor = cursor; + self.state = state; + self.nesting = nesting; + self.indentations.rewind(indentations_checkpoint); + self.pending_indentation = pending_indentation; + self.fstrings.rewind(fstrings_checkpoint); + self.errors.truncate(errors_position); + } + + pub fn finish(self) -> Vec { + self.errors + } } -// Implement iterator pattern for Lexer. -// Calling the next element in the iterator will yield the next lexical -// token. -impl Iterator for Lexer<'_> { - type Item = LexResult; +bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub(crate) struct TokenFlags: u8 { + /// The token is a string with double quotes (`"`). + const DOUBLE_QUOTES = 1 << 0; + /// The token is a triple-quoted string i.e., it starts and ends with three consecutive + /// quote characters (`"""` or `'''`). + const TRIPLE_QUOTED_STRING = 1 << 1; + + /// The token is a unicode string i.e., prefixed with `u` or `U` + const UNICODE_STRING = 1 << 2; + /// The token is a byte string i.e., prefixed with `b` or `B` + const BYTE_STRING = 1 << 3; + /// The token is an f-string i.e., prefixed with `f` or `F` + const F_STRING = 1 << 4; + /// The token is a raw string and the prefix character is in lowercase. + const RAW_STRING_LOWERCASE = 1 << 5; + /// The token is a raw string and the prefix character is in uppercase. + const RAW_STRING_UPPERCASE = 1 << 6; + + /// The token is a raw string i.e., prefixed with `r` or `R` + const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits(); + } +} - fn next(&mut self) -> Option { - let token = self.next_token(); +impl StringFlags for TokenFlags { + fn quote_style(self) -> Quote { + if self.intersects(TokenFlags::DOUBLE_QUOTES) { + Quote::Double + } else { + Quote::Single + } + } + + fn is_triple_quoted(self) -> bool { + self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) + } - match token { - Ok((Tok::EndOfFile, _)) => None, - r => Some(r), + fn prefix(self) -> AnyStringPrefix { + if self.intersects(TokenFlags::F_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Format(FStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::BYTE_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Bytes(ByteStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) + } else if self.intersects(TokenFlags::UNICODE_STRING) { + AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) + } else { + AnyStringPrefix::Regular(StringLiteralPrefix::Empty) } } } -impl FusedIterator for Lexer<'_> {} +impl TokenFlags { + /// Returns `true` if the token is an f-string. + const fn is_f_string(self) -> bool { + self.intersects(TokenFlags::F_STRING) + } + + /// Returns `true` if the token is a raw string. + const fn is_raw_string(self) -> bool { + self.intersects(TokenFlags::RAW_STRING) + } + + pub(crate) fn as_any_string_flags(self) -> AnyStringFlags { + AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Token { + /// The kind of the token. + kind: TokenKind, + /// The range of the token. + range: TextRange, + /// The set of flags describing this token. + flags: TokenFlags, +} + +impl Token { + pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token { + Self { kind, range, flags } + } + + /// Returns the token kind. + #[inline] + pub const fn kind(&self) -> TokenKind { + self.kind + } + + /// Returns the token as a tuple of (kind, range). + #[inline] + pub const fn as_tuple(&self) -> (TokenKind, TextRange) { + (self.kind, self.range) + } + + /// Returns `true` if this is a trivia token. + #[inline] + pub const fn is_trivia(self) -> bool { + matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline) + } + + /// Returns `true` if this is any kind of string token. + const fn is_any_string(self) -> bool { + matches!( + self.kind, + TokenKind::String + | TokenKind::FStringStart + | TokenKind::FStringMiddle + | TokenKind::FStringEnd + ) + } + + /// Returns `true` if the current token is a triple-quoted string of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub fn is_triple_quoted_string(self) -> bool { + assert!(self.is_any_string()); + self.flags.is_triple_quoted() + } + + /// Returns the [`Quote`] style for the current string token of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub fn string_quote_style(self) -> Quote { + assert!(self.is_any_string()); + self.flags.quote_style() + } +} + +impl Ranged for Token { + fn range(&self) -> TextRange { + self.range + } +} /// Represents an error that occur during lexing and are /// returned by the `parse_*` functions in the iterator in the @@ -1463,6 +1664,55 @@ impl std::fmt::Display for LexicalErrorType { } } +#[derive(Clone, Debug, Default)] +pub(crate) enum TokenValue { + #[default] + None, + /// Token value for a name, commonly known as an identifier. + /// + /// Unicode names are NFKC-normalized by the lexer, + /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers) + Name(Box), + /// Token value for an integer. + Int(Int), + /// Token value for a floating point number. + Float(f64), + /// Token value for a complex number. + Complex { + /// The real part of the complex number. + real: f64, + /// The imaginary part of the complex number. + imag: f64, + }, + /// Token value for a string. + String(Box), + /// Token value that includes the portion of text inside the f-string that's not + /// part of the expression part and isn't an opening or closing brace. + FStringMiddle(Box), + /// Token value for IPython escape commands. These are recognized by the lexer + /// only when the mode is [`Mode::Ipython`]. + IpyEscapeCommand { + /// The magic command value. + value: Box, + /// The kind of magic command. + kind: IpyEscapeKind, + }, +} + +pub(crate) struct LexerCheckpoint<'src> { + value: TokenValue, + current_kind: TokenKind, + current_range: TextRange, + current_flags: TokenFlags, + cursor: Cursor<'src>, + state: State, + nesting: u32, + indentations_checkpoint: IndentationsCheckpoint, + pending_indentation: Option, + fstrings_checkpoint: FStringsCheckpoint, + errors_position: usize, +} + #[derive(Copy, Clone, Debug)] enum State { /// Lexer is right at the beginning of the file or after a `Newline` token. @@ -1608,9 +1858,16 @@ impl<'a> LexedText<'a> { } } +/// Create a new [`Lexer`] for the given source code and [`Mode`]. +pub fn lex(source: &str, mode: Mode) -> Lexer { + Lexer::new(source, mode, TextSize::default()) +} + #[cfg(test)] mod tests { - use insta::assert_debug_snapshot; + use std::fmt::Write; + + use insta::assert_snapshot; use super::*; @@ -1618,61 +1875,145 @@ mod tests { const MAC_EOL: &str = "\r"; const UNIX_EOL: &str = "\n"; - fn lex_source_with_mode(source: &str, mode: Mode) -> Vec { - let lexer = lex(source, mode); - lexer.map(std::result::Result::unwrap).collect() + /// Same as [`Token`] except that this includes the [`TokenValue`] as well. + struct TestToken { + kind: TokenKind, + value: TokenValue, + range: TextRange, + flags: TokenFlags, + } + + impl std::fmt::Debug for TestToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut tuple = f.debug_tuple(""); + let mut tuple = if matches!(self.value, TokenValue::None) { + tuple.field(&self.kind) + } else { + tuple.field(&self.value) + }; + tuple = tuple.field(&self.range); + if self.flags.is_empty() { + tuple.finish() + } else { + tuple.field(&self.flags).finish() + } + } } - fn lex_source(source: &str) -> Vec { - lex_source_with_mode(source, Mode::Module) + struct LexerOutput { + tokens: Vec, + errors: Vec, } - fn lex_jupyter_source(source: &str) -> Vec { - lex_source_with_mode(source, Mode::Ipython) + impl std::fmt::Display for LexerOutput { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "## Tokens")?; + writeln!(f, "```\n{:#?}\n```", self.tokens)?; + if !self.errors.is_empty() { + writeln!(f, "## Errors")?; + writeln!(f, "```\n{:#?}\n```", self.errors)?; + } + Ok(()) + } } - fn ipython_escape_command_line_continuation_eol(eol: &str) -> Vec { + fn lex(source: &str, mode: Mode) -> LexerOutput { + let mut lexer = Lexer::new(source, mode, TextSize::default()); + let mut tokens = Vec::new(); + loop { + let kind = lexer.next_token(); + if kind.is_eof() { + break; + } + tokens.push(TestToken { + kind, + value: lexer.take_value(), + range: lexer.current_range(), + flags: lexer.current_flags(), + }); + } + LexerOutput { + tokens, + errors: lexer.finish(), + } + } + + fn lex_valid(source: &str, mode: Mode) -> LexerOutput { + let output = lex(source, mode); + + if !output.errors.is_empty() { + let mut message = "Unexpected lexical errors for a valid source:\n".to_string(); + for error in &output.errors { + writeln!(&mut message, "{error:?}").unwrap(); + } + writeln!(&mut message, "Source:\n{source}").unwrap(); + panic!("{message}"); + } + + output + } + + fn lex_invalid(source: &str, mode: Mode) -> LexerOutput { + let output = lex(source, mode); + + assert!( + !output.errors.is_empty(), + "Expected lexer to generate at least one error for the following source:\n{source}" + ); + + output + } + + fn lex_source(source: &str) -> LexerOutput { + lex_valid(source, Mode::Module) + } + + fn lex_jupyter_source(source: &str) -> LexerOutput { + lex_valid(source, Mode::Ipython) + } + + fn ipython_escape_command_line_continuation_eol(eol: &str) -> LexerOutput { let source = format!("%matplotlib \\{eol} --inline"); lex_jupyter_source(&source) } #[test] fn test_ipython_escape_command_line_continuation_unix_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(UNIX_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(UNIX_EOL)); } #[test] fn test_ipython_escape_command_line_continuation_mac_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(MAC_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(MAC_EOL)); } #[test] fn test_ipython_escape_command_line_continuation_windows_eol() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_eol(WINDOWS_EOL)); + assert_snapshot!(ipython_escape_command_line_continuation_eol(WINDOWS_EOL)); } - fn ipython_escape_command_line_continuation_with_eol_and_eof(eol: &str) -> Vec { + fn ipython_escape_command_line_continuation_with_eol_and_eof(eol: &str) -> LexerOutput { let source = format!("%matplotlib \\{eol}"); lex_jupyter_source(&source) } #[test] fn test_ipython_escape_command_line_continuation_with_unix_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( UNIX_EOL )); } #[test] fn test_ipython_escape_command_line_continuation_with_mac_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( MAC_EOL )); } #[test] fn test_ipython_escape_command_line_continuation_with_windows_eol_and_eof() { - assert_debug_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( + assert_snapshot!(ipython_escape_command_line_continuation_with_eol_and_eof( WINDOWS_EOL )); } @@ -1680,7 +2021,7 @@ mod tests { #[test] fn test_empty_ipython_escape_command() { let source = "%\n%%\n!\n!!\n?\n??\n/\n,\n;"; - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1701,7 +2042,7 @@ mod tests { !ls " .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1726,7 +2067,7 @@ mod tests { %%foo??? !pwd?" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1736,7 +2077,7 @@ if True: %matplotlib \ --inline" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } #[test] @@ -1748,13 +2089,13 @@ bar = %timeit a % 3 baz = %matplotlib \ inline" .trim(); - assert_debug_snapshot!(lex_jupyter_source(source)); + assert_snapshot!(lex_jupyter_source(source)); } - fn assert_no_ipython_escape_command(tokens: &[Spanned]) { - for (tok, _) in tokens { - if let Tok::IpyEscapeCommand { .. } = tok { - panic!("Unexpected escape command token: {tok:?}") + fn assert_no_ipython_escape_command(tokens: &[TestToken]) { + for token in tokens { + if matches!(token.kind, TokenKind::IpyEscapeCommand) { + panic!("Unexpected escape command token at {:?}", token.range) } } } @@ -1772,147 +2113,153 @@ foo = ,func def f(arg=%timeit a = b): pass" .trim(); - let tokens = lex_jupyter_source(source); - assert_no_ipython_escape_command(&tokens); + let output = lex(source, Mode::Ipython); + assert!(output.errors.is_empty()); + assert_no_ipython_escape_command(&output.tokens); } #[test] fn test_numbers() { let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j 000 0x995DC9BBDF1939FA 0x995DC9BBDF1939FA995DC9BBDF1939FA"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_invalid_leading_zero_small() { let source = "025"; - - let lexer = lex(source, Mode::Module); - let tokens = lexer.collect::, LexicalError>>(); - assert_debug_snapshot!(tokens); + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_invalid_leading_zero_big() { let source = "0252222222222222522222222222225222222222222252222222222222522222222222225222222222222"; - - let lexer = lex(source, Mode::Module); - let tokens = lexer.collect::, LexicalError>>(); - assert_debug_snapshot!(tokens); + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_line_comment_long() { let source = "99232 # foo".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_whitespace() { let source = "99232 # ".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_single_whitespace() { let source = "99232 # ".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } #[test] fn test_line_comment_empty() { let source = "99232 #".to_string(); - assert_debug_snapshot!(lex_source(&source)); + assert_snapshot!(lex_source(&source)); } - fn comment_until_eol(eol: &str) -> Vec { + fn comment_until_eol(eol: &str) -> LexerOutput { let source = format!("123 # Foo{eol}456"); lex_source(&source) } #[test] fn test_comment_until_unix_eol() { - assert_debug_snapshot!(comment_until_eol(UNIX_EOL)); + assert_snapshot!(comment_until_eol(UNIX_EOL)); } #[test] fn test_comment_until_mac_eol() { - assert_debug_snapshot!(comment_until_eol(MAC_EOL)); + assert_snapshot!(comment_until_eol(MAC_EOL)); } #[test] fn test_comment_until_windows_eol() { - assert_debug_snapshot!(comment_until_eol(WINDOWS_EOL)); + assert_snapshot!(comment_until_eol(WINDOWS_EOL)); } #[test] fn test_assignment() { let source = r"a_variable = 99 + 2-0"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn indentation_with_eol(eol: &str) -> Vec { + fn indentation_with_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol} return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_indentation_with_unix_eol() { - assert_debug_snapshot!(indentation_with_eol(UNIX_EOL)); + assert_snapshot!(indentation_with_eol(UNIX_EOL)); } #[test] fn test_indentation_with_mac_eol() { - assert_debug_snapshot!(indentation_with_eol(MAC_EOL)); + assert_snapshot!(indentation_with_eol(MAC_EOL)); } #[test] fn test_indentation_with_windows_eol() { - assert_debug_snapshot!(indentation_with_eol(WINDOWS_EOL)); + assert_snapshot!(indentation_with_eol(WINDOWS_EOL)); } - fn double_dedent_with_eol(eol: &str) -> Vec { + fn double_dedent_with_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol} if x:{eol}{eol} return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_double_dedent_with_unix_eol() { - assert_debug_snapshot!(double_dedent_with_eol(UNIX_EOL)); + assert_snapshot!(double_dedent_with_eol(UNIX_EOL)); } #[test] fn test_double_dedent_with_mac_eol() { - assert_debug_snapshot!(double_dedent_with_eol(MAC_EOL)); + assert_snapshot!(double_dedent_with_eol(MAC_EOL)); } #[test] fn test_double_dedent_with_windows_eol() { - assert_debug_snapshot!(double_dedent_with_eol(WINDOWS_EOL)); + assert_snapshot!(double_dedent_with_eol(WINDOWS_EOL)); } - fn double_dedent_with_tabs_eol(eol: &str) -> Vec { + fn double_dedent_with_tabs_eol(eol: &str) -> LexerOutput { let source = format!("def foo():{eol}\tif x:{eol}{eol}\t\t return 99{eol}{eol}"); lex_source(&source) } #[test] fn test_double_dedent_with_tabs_unix_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(UNIX_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(UNIX_EOL)); } #[test] fn test_double_dedent_with_tabs_mac_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(MAC_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(MAC_EOL)); } #[test] fn test_double_dedent_with_tabs_windows_eol() { - assert_debug_snapshot!(double_dedent_with_tabs_eol(WINDOWS_EOL)); + assert_snapshot!(double_dedent_with_tabs_eol(WINDOWS_EOL)); } - fn newline_in_brackets_eol(eol: &str) -> Vec { + #[test] + fn dedent_after_whitespace() { + let source = "\ +if first: + if second: + pass + foo +"; + assert_snapshot!(lex_source(source)); + } + + fn newline_in_brackets_eol(eol: &str) -> LexerOutput { let source = r"x = [ 1,2 @@ -1929,17 +2276,17 @@ def f(arg=%timeit a = b): #[test] fn test_newline_in_brackets_unix_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(UNIX_EOL)); + assert_snapshot!(newline_in_brackets_eol(UNIX_EOL)); } #[test] fn test_newline_in_brackets_mac_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(MAC_EOL)); + assert_snapshot!(newline_in_brackets_eol(MAC_EOL)); } #[test] fn test_newline_in_brackets_windows_eol() { - assert_debug_snapshot!(newline_in_brackets_eol(WINDOWS_EOL)); + assert_snapshot!(newline_in_brackets_eol(WINDOWS_EOL)); } #[test] @@ -1951,55 +2298,57 @@ def f(arg=%timeit a = b): 'c' \ 'd' )"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_logical_newline_line_comment() { let source = "#Hello\n#World\n"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_operators() { let source = "//////=/ /"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_string() { let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\'' '\420' '\200\0a'"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn string_continuation_with_eol(eol: &str) -> Vec { + fn string_continuation_with_eol(eol: &str) -> LexerOutput { let source = format!("\"abc\\{eol}def\""); lex_source(&source) } #[test] fn test_string_continuation_with_unix_eol() { - assert_debug_snapshot!(string_continuation_with_eol(UNIX_EOL)); + assert_snapshot!(string_continuation_with_eol(UNIX_EOL)); } #[test] fn test_string_continuation_with_mac_eol() { - assert_debug_snapshot!(string_continuation_with_eol(MAC_EOL)); + assert_snapshot!(string_continuation_with_eol(MAC_EOL)); } #[test] fn test_string_continuation_with_windows_eol() { - assert_debug_snapshot!(string_continuation_with_eol(WINDOWS_EOL)); + assert_snapshot!(string_continuation_with_eol(WINDOWS_EOL)); } #[test] fn test_escape_unicode_name() { let source = r#""\N{EN SPACE}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn get_tokens_only(source: &str) -> Vec { - lex_source(source).into_iter().map(|(tok, _)| tok).collect() + fn get_tokens_only(source: &str) -> Vec { + let output = lex(source, Mode::Module); + assert!(output.errors.is_empty()); + output.tokens.into_iter().map(|token| token.kind).collect() } #[test] @@ -2009,24 +2358,24 @@ def f(arg=%timeit a = b): assert_eq!(get_tokens_only(source1), get_tokens_only(source2)); } - fn triple_quoted_eol(eol: &str) -> Vec { + fn triple_quoted_eol(eol: &str) -> LexerOutput { let source = format!("\"\"\"{eol} test string{eol} \"\"\""); lex_source(&source) } #[test] fn test_triple_quoted_unix_eol() { - assert_debug_snapshot!(triple_quoted_eol(UNIX_EOL)); + assert_snapshot!(triple_quoted_eol(UNIX_EOL)); } #[test] fn test_triple_quoted_mac_eol() { - assert_debug_snapshot!(triple_quoted_eol(MAC_EOL)); + assert_snapshot!(triple_quoted_eol(MAC_EOL)); } #[test] fn test_triple_quoted_windows_eol() { - assert_debug_snapshot!(triple_quoted_eol(WINDOWS_EOL)); + assert_snapshot!(triple_quoted_eol(WINDOWS_EOL)); } // This test case is to just make sure that the lexer doesn't go into @@ -2034,125 +2383,110 @@ def f(arg=%timeit a = b): #[test] fn test_infinite_loop() { let source = "[1"; - let _ = lex(source, Mode::Module).collect::>(); + lex_invalid(source, Mode::Module); } /// Emoji identifiers are a non-standard python feature and are not supported by our lexer. #[test] fn test_emoji_identifier() { let source = "🐦"; - - let lexed: Vec<_> = lex(source, Mode::Module).collect(); - - match lexed.as_slice() { - [Err(error)] => { - assert_eq!( - error.error(), - &LexicalErrorType::UnrecognizedToken { tok: '🐦' } - ); - } - result => panic!("Expected an error token but found {result:?}"), - } + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn tet_too_low_dedent() { - let tokens: Vec<_> = lex( - "if True: + let source = "if True: pass - pass", - Mode::Module, - ) - .collect(); - assert_debug_snapshot!(tokens); + pass"; + assert_snapshot!(lex_invalid(source, Mode::Module)); } #[test] fn test_empty_fstrings() { let source = r#"f"" "" F"" f'' '' f"""""" f''''''"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_prefix() { let source = r#"f"" F"" rf"" rF"" Rf"" RF"" fr"" Fr"" fR"" FR"""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring() { let source = r#"f"normal {foo} {{another}} {bar} {{{three}}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_parentheses() { let source = r#"f"{}" f"{{}}" f" {}" f"{{{}}}" f"{{{{}}}}" f" {} {{}} {{{}}} {{{{}}}} ""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } - fn fstring_single_quote_escape_eol(eol: &str) -> Vec { + fn fstring_single_quote_escape_eol(eol: &str) -> LexerOutput { let source = format!(r"f'text \{eol} more text'"); lex_source(&source) } #[test] fn test_fstring_single_quote_escape_unix_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(UNIX_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(UNIX_EOL)); } #[test] fn test_fstring_single_quote_escape_mac_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(MAC_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(MAC_EOL)); } #[test] fn test_fstring_single_quote_escape_windows_eol() { - assert_debug_snapshot!(fstring_single_quote_escape_eol(WINDOWS_EOL)); + assert_snapshot!(fstring_single_quote_escape_eol(WINDOWS_EOL)); } #[test] fn test_fstring_escape() { let source = r#"f"\{x:\"\{x}} \"\"\ end""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_escape_braces() { let source = r"f'\{foo}' f'\\{foo}' f'\{{foo}}' f'\\{{foo}}'"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_escape_raw() { let source = r#"rf"\{x:\"\{x}} \"\"\ end""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_named_unicode() { let source = r#"f"\N{BULLET} normal \Nope \N""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_named_unicode_raw() { let source = r#"rf"\N{BULLET} normal""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_named_expression() { let source = r#"f"{x:=10} {(x:=10)} {x,{y:=10}} {[x:=10]}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_format_spec() { let source = r#"f"{foo:} {x=!s:.3f} {x:.{y}f} {'':*^{1:{1}}} {x:{{1}.pop()}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2175,19 +2509,19 @@ f'__{ b }__' "; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_conversion() { let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_nested() { let source = r#"f"foo {f"bar {x + f"{wow}"}"} baz" f'foo {f'bar'} some {f"another"}'"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2197,7 +2531,7 @@ f'__{ * y } second""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2210,7 +2544,7 @@ hello hello ''' f"some {f"""multiline allowed {x}"""} string""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2220,13 +2554,13 @@ allowed {x}"""} string""#; x } # not a comment """"#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_ipy_escape_command() { let source = r#"f"foo {!pwd} bar""#; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2236,13 +2570,13 @@ f"{lambda x:{x}}" f"{(lambda x:{x})}" "# .trim(); - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] fn test_fstring_with_nul_char() { let source = r"f'\0'"; - assert_debug_snapshot!(lex_source(source)); + assert_snapshot!(lex_source(source)); } #[test] @@ -2250,18 +2584,18 @@ f"{(lambda x:{x})}" let source = r"match foo: case bar: pass"; - assert_debug_snapshot!(lex_jupyter_source(source)); - } - - fn lex_error(source: &str) -> LexicalError { - match lex(source, Mode::Module).find_map(Result::err) { - Some(err) => err, - _ => panic!("Expected at least one error"), - } + assert_snapshot!(lex_jupyter_source(source)); } fn lex_fstring_error(source: &str) -> FStringErrorType { - match lex_error(source).into_error() { + let output = lex(source, Mode::Module); + match output + .errors + .into_iter() + .next() + .expect("lexer should give at least one error") + .into_error() + { LexicalErrorType::FStringError(error) => error, err => panic!("Expected FStringError: {err:?}"), } diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs index 6dd8e63d70ad8..e7cd633920aa5 100644 --- a/crates/ruff_python_parser/src/lexer/cursor.rs +++ b/crates/ruff_python_parser/src/lexer/cursor.rs @@ -1,18 +1,26 @@ -use ruff_text_size::{TextLen, TextSize}; use std::str::Chars; +use ruff_text_size::{TextLen, TextSize}; + pub(crate) const EOF_CHAR: char = '\0'; +/// A cursor represents a pointer in the source code. #[derive(Clone, Debug)] -pub(super) struct Cursor<'a> { - chars: Chars<'a>, +pub(super) struct Cursor<'src> { + /// An iterator over the [`char`]'s of the source code. + chars: Chars<'src>, + + /// Length of the source code. This is used as a marker to indicate the start of the current + /// token which is being lexed. source_length: TextSize, + + /// Stores the previous character for debug assertions. #[cfg(debug_assertions)] prev_char: char, } -impl<'a> Cursor<'a> { - pub(crate) fn new(source: &'a str) -> Self { +impl<'src> Cursor<'src> { + pub(crate) fn new(source: &'src str) -> Self { Self { source_length: source.text_len(), chars: source.chars(), @@ -21,14 +29,14 @@ impl<'a> Cursor<'a> { } } - /// Returns the previous token. Useful for debug assertions. + /// Returns the previous character. Useful for debug assertions. #[cfg(debug_assertions)] pub(super) const fn previous(&self) -> char { self.prev_char } /// Peeks the next character from the input stream without consuming it. - /// Returns [`EOF_CHAR`] if the file is at the end of the file. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. pub(super) fn first(&self) -> char { self.chars.clone().next().unwrap_or(EOF_CHAR) } @@ -42,29 +50,44 @@ impl<'a> Cursor<'a> { } /// Returns the remaining text to lex. - pub(super) fn rest(&self) -> &'a str { + /// + /// Use [`Cursor::text_len`] to get the length of the remaining text. + pub(super) fn rest(&self) -> &'src str { self.chars.as_str() } + /// Returns the length of the remaining text. + /// + /// Use [`Cursor::rest`] to get the remaining text. // SAFETY: The `source.text_len` call in `new` would panic if the string length is larger than a `u32`. #[allow(clippy::cast_possible_truncation)] pub(super) fn text_len(&self) -> TextSize { TextSize::new(self.chars.as_str().len() as u32) } + /// Returns the length of the current token length. + /// + /// This is to be used after setting the start position of the token using + /// [`Cursor::start_token`]. pub(super) fn token_len(&self) -> TextSize { self.source_length - self.text_len() } + /// Mark the current position of the cursor as the start of the token which is going to be + /// lexed. + /// + /// Use [`Cursor::token_len`] to get the length of the lexed token. pub(super) fn start_token(&mut self) { self.source_length = self.text_len(); } + /// Returns `true` if the cursor is at the end of file. pub(super) fn is_eof(&self) -> bool { self.chars.as_str().is_empty() } - /// Consumes the next character + /// Moves the cursor to the next character, returning the previous character. + /// Returns [`None`] if there is no next character. pub(super) fn bump(&mut self) -> Option { let prev = self.chars.next()?; diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs index 16dae1222d1ec..7b702a77b7269 100644 --- a/crates/ruff_python_parser/src/lexer/fstring.rs +++ b/crates/ruff_python_parser/src/lexer/fstring.rs @@ -1,9 +1,11 @@ -use ruff_python_ast::{AnyStringFlags, StringFlags}; +use ruff_python_ast::StringFlags; + +use super::TokenFlags; /// The context representing the current f-string that the lexer is in. -#[derive(Debug)] +#[derive(Clone, Debug)] pub(crate) struct FStringContext { - flags: AnyStringFlags, + flags: TokenFlags, /// The level of nesting for the lexer when it entered the current f-string. /// The nesting level includes all kinds of parentheses i.e., round, square, @@ -17,8 +19,9 @@ pub(crate) struct FStringContext { } impl FStringContext { - pub(crate) const fn new(flags: AnyStringFlags, nesting: u32) -> Self { - debug_assert!(flags.is_f_string()); + pub(crate) const fn new(flags: TokenFlags, nesting: u32) -> Self { + assert!(flags.is_f_string()); + Self { flags, nesting, @@ -26,8 +29,7 @@ impl FStringContext { } } - pub(crate) const fn flags(&self) -> AnyStringFlags { - debug_assert!(self.flags.is_f_string()); + pub(crate) const fn flags(&self) -> TokenFlags { self.flags } @@ -127,4 +129,15 @@ impl FStrings { pub(crate) fn current_mut(&mut self) -> Option<&mut FStringContext> { self.stack.last_mut() } + + pub(crate) fn checkpoint(&self) -> FStringsCheckpoint { + FStringsCheckpoint(self.stack.clone()) + } + + pub(crate) fn rewind(&mut self, checkpoint: FStringsCheckpoint) { + self.stack = checkpoint.0; + } } + +#[derive(Debug, Clone)] +pub(crate) struct FStringsCheckpoint(Vec); diff --git a/crates/ruff_python_parser/src/lexer/indentation.rs b/crates/ruff_python_parser/src/lexer/indentation.rs index 2b12efab063fb..7125f3a2247a3 100644 --- a/crates/ruff_python_parser/src/lexer/indentation.rs +++ b/crates/ruff_python_parser/src/lexer/indentation.rs @@ -82,8 +82,8 @@ impl Indentation { #[derive(Debug, Copy, Clone, PartialEq)] pub(super) struct UnexpectedIndentation; -// The indentations stack is used to keep track of the current indentation level -// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation). +/// The indentations stack is used to keep track of the current indentation level +/// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation). #[derive(Debug, Clone, Default)] pub(super) struct Indentations { stack: Vec, @@ -124,8 +124,19 @@ impl Indentations { static ROOT: Indentation = Indentation::root(); self.stack.last().unwrap_or(&ROOT) } + + pub(crate) fn checkpoint(&self) -> IndentationsCheckpoint { + IndentationsCheckpoint(self.stack.clone()) + } + + pub(crate) fn rewind(&mut self, checkpoint: IndentationsCheckpoint) { + self.stack = checkpoint.0; + } } +#[derive(Debug, Clone)] +pub(crate) struct IndentationsCheckpoint(Vec); + assert_eq_size!(Indentation, u64); #[cfg(test)] diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 3795203b9887e..52b436592b92c 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -57,81 +57,37 @@ //! //! - token: This module contains the definition of the tokens that are generated by the lexer. //! - [lexer]: This module contains the lexer and is responsible for generating the tokens. -//! - parser: This module contains an interface to the [Program] and is responsible for generating the AST. +//! - parser: This module contains an interface to the [Parsed] and is responsible for generating the AST. //! - mode: This module contains the definition of the different modes that the `ruff_python_parser` can be in. //! -//! # Examples -//! -//! For example, to get a stream of tokens from a given string, one could do this: -//! -//! ``` -//! use ruff_python_parser::{lexer::lex, Mode}; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let mut tokens = lex(python_source, Mode::Module); -//! assert!(tokens.all(|t| t.is_ok())); -//! ``` -//! -//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST: -//! -//! ``` -//! use ruff_python_parser::lexer::lex; -//! use ruff_python_parser::{Mode, parse_tokens}; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let tokens = lex(python_source, Mode::Module); -//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module); -//! -//! assert!(ast.is_ok()); -//! ``` -//! -//! Alternatively, you can use one of the other `parse_*` functions to parse a string directly without using a specific -//! mode or tokenizing the source beforehand: -//! -//! ``` -//! use ruff_python_parser::parse_suite; -//! -//! let python_source = r#" -//! def is_odd(i): -//! return bool(i & 1) -//! "#; -//! let ast = parse_suite(python_source); -//! -//! assert!(ast.is_ok()); -//! ``` -//! //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing //! [lexer]: crate::lexer -use std::iter::FusedIterator; +use std::cell::OnceCell; use std::ops::Deref; -use ruff_python_ast::{Expr, Mod, ModModule, PySourceType, Suite}; -use ruff_text_size::{TextRange, TextSize}; - pub use crate::error::{FStringErrorType, ParseError, ParseErrorType}; -use crate::lexer::{lex, lex_starts_at, LexResult}; -pub use crate::parser::Program; -pub use crate::token::{Tok, TokenKind}; +pub use crate::lexer::Token; +pub use crate::token::TokenKind; + +use crate::parser::Parser; + +use itertools::Itertools; +use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite}; +use ruff_python_trivia::CommentRanges; +use ruff_text_size::{Ranged, TextRange, TextSize}; mod error; pub mod lexer; mod parser; -mod soft_keywords; mod string; mod token; mod token_set; mod token_source; pub mod typing; -/// Parse a full Python program usually consisting of multiple lines. +/// Parse a full Python module usually consisting of multiple lines. /// /// This is a convenience function that can be used to parse a full Python program without having to /// specify the [`Mode`] or the location. It is probably what you want to use most of the time. @@ -141,7 +97,7 @@ pub mod typing; /// For example, parsing a simple function definition and a call to that function: /// /// ``` -/// use ruff_python_parser::parse_program; +/// use ruff_python_parser::parse_module; /// /// let source = r#" /// def foo(): @@ -150,41 +106,15 @@ pub mod typing; /// print(foo()) /// "#; /// -/// let program = parse_program(source); -/// assert!(program.is_ok()); +/// let module = parse_module(source); +/// assert!(module.is_ok()); /// ``` -pub fn parse_program(source: &str) -> Result { - let lexer = lex(source, Mode::Module); - match parse_tokens(lexer.collect(), source, Mode::Module)? { - Mod::Module(m) => Ok(m), - Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), - } -} - -/// Parse a full Python program into a [`Suite`]. -/// -/// This function is similar to [`parse_program`] except that it returns the module body -/// instead of the module itself. -/// -/// # Example -/// -/// For example, parsing a simple function definition and a call to that function: -/// -/// ``` -/// use ruff_python_parser::parse_suite; -/// -/// let source = r#" -/// def foo(): -/// return 42 -/// -/// print(foo()) -/// "#; -/// -/// let body = parse_suite(source); -/// assert!(body.is_ok()); -/// ``` -pub fn parse_suite(source: &str) -> Result { - parse_program(source).map(|m| m.body) +pub fn parse_module(source: &str) -> Result, ParseError> { + Parser::new(source, Mode::Module) + .parse() + .try_into_module() + .unwrap() + .into_result() } /// Parses a single Python expression. @@ -202,37 +132,40 @@ pub fn parse_suite(source: &str) -> Result { /// let expr = parse_expression("1 + 2"); /// assert!(expr.is_ok()); /// ``` -pub fn parse_expression(source: &str) -> Result { - let lexer = lex(source, Mode::Expression).collect(); - match parse_tokens(lexer, source, Mode::Expression)? { - Mod::Expression(expression) => Ok(*expression.body), - Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), - } +pub fn parse_expression(source: &str) -> Result, ParseError> { + Parser::new(source, Mode::Expression) + .parse() + .try_into_expression() + .unwrap() + .into_result() } -/// Parses a Python expression from a given location. +/// Parses a Python expression for the given range in the source. /// -/// This function allows to specify the location of the expression in the source code, other than +/// This function allows to specify the range of the expression in the source code, other than /// that, it behaves exactly like [`parse_expression`]. /// /// # Example /// -/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different, -/// somewhat silly, location: +/// Parsing one of the numeric literal which is part of an addition expression: /// /// ``` -/// use ruff_python_parser::parse_expression_starts_at; -/// # use ruff_text_size::TextSize; +/// use ruff_python_parser::parse_expression_range; +/// # use ruff_text_size::{TextRange, TextSize}; /// -/// let expr = parse_expression_starts_at("1 + 2", TextSize::from(400)); -/// assert!(expr.is_ok()); +/// let parsed = parse_expression_range("11 + 22 + 33", TextRange::new(TextSize::new(5), TextSize::new(7))); +/// assert!(parsed.is_ok()); /// ``` -pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result { - let lexer = lex_starts_at(source, Mode::Module, offset).collect(); - match parse_tokens(lexer, source, Mode::Expression)? { - Mod::Expression(expression) => Ok(*expression.body), - Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), - } +pub fn parse_expression_range( + source: &str, + range: TextRange, +) -> Result, ParseError> { + let source = &source[..range.end().to_usize()]; + Parser::new_starts_at(source, Mode::Expression, range.start()) + .parse() + .try_into_expression() + .unwrap() + .into_result() } /// Parse the given Python source code using the specified [`Mode`]. @@ -249,8 +182,8 @@ pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result Result Result Result { - let lxr = lexer::lex(source, mode); - parse_tokens(lxr.collect(), source, mode) +pub fn parse(source: &str, mode: Mode) -> Result, ParseError> { + parse_unchecked(source, mode).into_result() } -/// Parse the given Python source code using the specified [`Mode`] and [`TextSize`]. -/// -/// This function allows to specify the location of the source code, other than -/// that, it behaves exactly like [`parse`]. -/// -/// # Example -/// -/// ``` -/// # use ruff_text_size::TextSize; -/// use ruff_python_parser::{Mode, parse_starts_at}; -/// -/// let source = r#" -/// def fib(i): -/// a, b = 0, 1 -/// for _ in range(i): -/// a, b = b, a + b -/// return a +/// Parse the given Python source code using the specified [`Mode`]. /// -/// print(fib(42)) -/// "#; -/// let program = parse_starts_at(source, Mode::Module, TextSize::from(0)); -/// assert!(program.is_ok()); -/// ``` -pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result { - let lxr = lexer::lex_starts_at(source, mode, offset); - parse_tokens(lxr.collect(), source, mode) +/// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`] +/// and returns the [`Parsed`] as is. +pub fn parse_unchecked(source: &str, mode: Mode) -> Parsed { + Parser::new(source, mode).parse() } -/// Parse an iterator of [`LexResult`]s using the specified [`Mode`]. -/// -/// This could allow you to perform some preprocessing on the tokens before parsing them. -/// -/// # Example -/// -/// As an example, instead of parsing a string, we can parse a list of tokens after we generate -/// them using the [`lexer::lex`] function: -/// -/// ``` -/// use ruff_python_parser::lexer::lex; -/// use ruff_python_parser::{Mode, parse_tokens}; -/// -/// let source = "1 + 2"; -/// let tokens = lex(source, Mode::Expression); -/// let expr = parse_tokens(tokens.collect(), source, Mode::Expression); -/// assert!(expr.is_ok()); -/// ``` -pub fn parse_tokens(tokens: Vec, source: &str, mode: Mode) -> Result { - let program = Program::parse_tokens(source, tokens, mode); - if program.is_valid() { - Ok(program.into_ast()) - } else { - Err(program.into_errors().into_iter().next().unwrap()) - } +/// Parse the given Python source code using the specified [`PySourceType`]. +pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed { + // SAFETY: Safe because `PySourceType` always parses to a `ModModule` + Parser::new(source, source_type.as_mode()) + .parse() + .try_into_module() + .unwrap() } -/// Tokens represents a vector of [`LexResult`]. -/// -/// This should only include tokens up to and including the first error. This struct is created -/// by the [`tokenize`] function. +/// Represents the parsed source code. #[derive(Debug, Clone)] -pub struct Tokens(Vec); +pub struct Parsed { + syntax: T, + tokens: Tokens, + errors: Vec, + comment_ranges: CommentRanges, +} -impl Tokens { - /// Returns an iterator over the [`TokenKind`] and the range corresponding to the tokens. - pub fn kinds(&self) -> TokenKindIter { - TokenKindIter::new(&self.0) +impl Parsed { + /// Returns the syntax node represented by this parsed output. + pub fn syntax(&self) -> &T { + &self.syntax } - /// Consumes the [`Tokens`], returning the underlying vector of [`LexResult`]. - pub fn into_inner(self) -> Vec { - self.0 + /// Returns all the tokens for the parsed output. + pub fn tokens(&self) -> &Tokens { + &self.tokens } -} -impl Deref for Tokens { - type Target = [LexResult]; + /// Returns a list of syntax errors found during parsing. + pub fn errors(&self) -> &[ParseError] { + &self.errors + } - fn deref(&self) -> &Self::Target { - &self.0 + /// Returns the comment ranges for the parsed output. + pub fn comment_ranges(&self) -> &CommentRanges { + &self.comment_ranges } -} -/// An iterator over the [`TokenKind`] and the corresponding range. -/// -/// This struct is created by the [`Tokens::kinds`] method. -#[derive(Clone, Default)] -pub struct TokenKindIter<'a> { - inner: std::iter::Flatten>, -} + /// Consumes the [`Parsed`] output and returns the contained syntax node. + pub fn into_syntax(self) -> T { + self.syntax + } -impl<'a> TokenKindIter<'a> { - /// Create a new iterator from a slice of [`LexResult`]. - pub fn new(tokens: &'a [LexResult]) -> Self { - Self { - inner: tokens.iter().flatten(), + /// Consumes the [`Parsed`] output and returns a list of syntax errors found during parsing. + pub fn into_errors(self) -> Vec { + self.errors + } + + /// Returns `true` if the parsed source code is valid i.e., it has no syntax errors. + pub fn is_valid(&self) -> bool { + self.errors.is_empty() + } + + /// Returns the [`Parsed`] output as a [`Result`], returning [`Ok`] if it has no syntax errors, + /// or [`Err`] containing the first [`ParseError`] encountered. + pub fn as_result(&self) -> Result<&Parsed, &ParseError> { + if let [error, ..] = self.errors() { + Err(error) + } else { + Ok(self) } } - /// Return the next value without advancing the iterator. - pub fn peek(&mut self) -> Option<(TokenKind, TextRange)> { - self.clone().next() + /// Consumes the [`Parsed`] output and returns a [`Result`] which is [`Ok`] if it has no syntax + /// errors, or [`Err`] containing the first [`ParseError`] encountered. + pub(crate) fn into_result(self) -> Result, ParseError> { + if self.is_valid() { + Ok(self) + } else { + Err(self.into_errors().into_iter().next().unwrap()) + } } } -impl Iterator for TokenKindIter<'_> { - type Item = (TokenKind, TextRange); +impl Parsed { + /// Attempts to convert the [`Parsed`] into a [`Parsed`]. + /// + /// This method checks if the `syntax` field of the output is a [`Mod::Module`]. If it is, the + /// method returns [`Some(Parsed)`] with the contained module. Otherwise, it + /// returns [`None`]. + /// + /// [`Some(Parsed)`]: Some + fn try_into_module(self) -> Option> { + match self.syntax { + Mod::Module(module) => Some(Parsed { + syntax: module, + tokens: self.tokens, + errors: self.errors, + comment_ranges: self.comment_ranges, + }), + Mod::Expression(_) => None, + } + } - fn next(&mut self) -> Option { - let &(ref tok, range) = self.inner.next()?; - Some((TokenKind::from_token(tok), range)) + /// Attempts to convert the [`Parsed`] into a [`Parsed`]. + /// + /// This method checks if the `syntax` field of the output is a [`Mod::Expression`]. If it is, + /// the method returns [`Some(Parsed)`] with the contained expression. + /// Otherwise, it returns [`None`]. + /// + /// [`Some(Parsed)`]: Some + fn try_into_expression(self) -> Option> { + match self.syntax { + Mod::Module(_) => None, + Mod::Expression(expression) => Some(Parsed { + syntax: expression, + tokens: self.tokens, + errors: self.errors, + comment_ranges: self.comment_ranges, + }), + } } } -impl FusedIterator for TokenKindIter<'_> {} +impl Parsed { + /// Returns the module body contained in this parsed output as a [`Suite`]. + pub fn suite(&self) -> &Suite { + &self.syntax.body + } -impl DoubleEndedIterator for TokenKindIter<'_> { - fn next_back(&mut self) -> Option { - let &(ref tok, range) = self.inner.next_back()?; - Some((TokenKind::from_token(tok), range)) + /// Consumes the [`Parsed`] output and returns the module body as a [`Suite`]. + pub fn into_suite(self) -> Suite { + self.syntax.body } } -/// Collect tokens up to and including the first error. -pub fn tokenize(contents: &str, mode: Mode) -> Tokens { - let mut tokens: Vec = allocate_tokens_vec(contents); - for tok in lexer::lex(contents, mode) { - let is_err = tok.is_err(); - tokens.push(tok); - if is_err { - break; - } +impl Parsed { + /// Returns the expression contained in this parsed output. + pub fn expr(&self) -> &Expr { + &self.syntax.body } - Tokens(tokens) + /// Consumes the [`Parsed`] output and returns the contained [`Expr`]. + pub fn into_expr(self) -> Expr { + *self.syntax.body + } } -/// Tokenizes all tokens. -/// -/// It differs from [`tokenize`] in that it tokenizes all tokens and doesn't stop -/// after the first `Err`. -pub fn tokenize_all(contents: &str, mode: Mode) -> Vec { - let mut tokens = allocate_tokens_vec(contents); - for token in lexer::lex(contents, mode) { - tokens.push(token); - } - tokens +/// Tokens represents a vector of lexed [`Token`]. +#[derive(Debug, Clone)] +pub struct Tokens { + raw: Vec, + + /// Index of the first [`TokenKind::Unknown`] token or the length of the token vector. + first_unknown_or_len: OnceCell, } -/// Allocates a [`Vec`] with an approximated capacity to fit all tokens -/// of `contents`. -/// -/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation. -pub fn allocate_tokens_vec(contents: &str) -> Vec { - Vec::with_capacity(approximate_tokens_lower_bound(contents)) +impl Tokens { + pub(crate) fn new(tokens: Vec) -> Tokens { + Tokens { + raw: tokens, + first_unknown_or_len: OnceCell::new(), + } + } + + /// Returns a slice of tokens up to (and excluding) the first [`TokenKind::Unknown`] token or + /// all the tokens if there is none. + pub fn up_to_first_unknown(&self) -> &[Token] { + let end = *self.first_unknown_or_len.get_or_init(|| { + self.raw + .iter() + .find_position(|token| token.kind() == TokenKind::Unknown) + .map(|(idx, _)| idx) + .unwrap_or_else(|| self.raw.len()) + }); + &self.raw[..end] + } + + /// Returns a slice of [`Token`] that are within the given `range`. + /// + /// The start and end offset of the given range should be either: + /// 1. Token boundary + /// 2. Gap between the tokens + /// + /// For example, considering the following tokens and their corresponding range: + /// + /// | Token | Range | + /// |---------------------|-----------| + /// | `Def` | `0..3` | + /// | `Name` | `4..7` | + /// | `Lpar` | `7..8` | + /// | `Rpar` | `8..9` | + /// | `Colon` | `9..10` | + /// | `Newline` | `10..11` | + /// | `Comment` | `15..24` | + /// | `NonLogicalNewline` | `24..25` | + /// | `Indent` | `25..29` | + /// | `Pass` | `29..33` | + /// + /// Here, for (1) a token boundary is considered either the start or end offset of any of the + /// above tokens. For (2), the gap would be any offset between the `Newline` and `Comment` + /// token which are 12, 13, and 14. + /// + /// Examples: + /// 1) `4..10` would give `Name`, `Lpar`, `Rpar`, `Colon` + /// 2) `11..25` would give `Comment`, `NonLogicalNewline` + /// 3) `12..25` would give same as (2) and offset 12 is in the "gap" + /// 4) `9..12` would give `Colon`, `Newline` and offset 12 is in the "gap" + /// 5) `18..27` would panic because both the start and end offset is within a token + /// + /// ## Note + /// + /// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical + /// error encountered within the given range. + /// + /// # Panics + /// + /// If either the start or end offset of the given range is within a token range. + pub fn in_range(&self, range: TextRange) -> &[Token] { + let tokens_after_start = self.after(range.start()); + + match tokens_after_start.binary_search_by_key(&range.end(), Ranged::end) { + Ok(idx) => { + // If we found the token with the end offset, that token should be included in the + // return slice. + &tokens_after_start[..=idx] + } + Err(idx) => { + if let Some(token) = tokens_after_start.get(idx) { + // If it's equal to the start offset, then it's at a token boundary which is + // valid. If it's less than the start offset, then it's in the gap between the + // tokens which is valid as well. + assert!( + range.end() <= token.start(), + "End offset {:?} is inside a token range {:?}", + range.end(), + token.range() + ); + } + + // This index is where the token with the offset _could_ be, so that token should + // be excluded from the return slice. + &tokens_after_start[..idx] + } + } + } + + /// Returns a slice of tokens after the given [`TextSize`] offset. + /// + /// If the given offset is between two tokens, the returned slice will start from the following + /// token. In other words, if the offset is between the end of previous token and start of next + /// token, the returned slice will start from the next token. + /// + /// # Panics + /// + /// If the given offset is inside a token range. + pub fn after(&self, offset: TextSize) -> &[Token] { + match self.binary_search_by(|token| token.start().cmp(&offset)) { + Ok(idx) => &self[idx..], + Err(idx) => { + // We can't use `saturating_sub` here because a file could contain a BOM header, in + // which case the token starts at offset 3 for UTF-8 encoded file content. + if idx > 0 { + if let Some(prev) = self.get(idx - 1) { + // If it's equal to the end offset, then it's at a token boundary which is + // valid. If it's greater than the end offset, then it's in the gap between + // the tokens which is valid as well. + assert!( + offset >= prev.end(), + "Offset {:?} is inside a token range {:?}", + offset, + prev.range() + ); + } + } + + &self[idx..] + } + } + } } -/// Approximates the number of tokens when lexing `contents`. -fn approximate_tokens_lower_bound(contents: &str) -> usize { - contents.len().saturating_mul(15) / 100 +impl<'a> IntoIterator for &'a Tokens { + type Item = &'a Token; + type IntoIter = std::slice::Iter<'a, Token>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } } -/// Parse a full Python program from its tokens. -pub fn parse_program_tokens( - tokens: Tokens, - source: &str, - is_jupyter_notebook: bool, -) -> anyhow::Result { - let mode = if is_jupyter_notebook { - Mode::Ipython - } else { - Mode::Module - }; - match parse_tokens(tokens.into_inner(), source, mode)? { - Mod::Module(m) => Ok(m.body), - Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), +impl Deref for Tokens { + type Target = [Token]; + + fn deref(&self) -> &Self::Target { + &self.raw } } @@ -529,3 +581,174 @@ impl std::fmt::Display for ModeParseError { write!(f, r#"mode must be "exec", "eval", "ipython", or "single""#) } } + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use crate::lexer::TokenFlags; + + use super::*; + + /// Test case containing a "gap" between two tokens. + /// + /// Code: + const TEST_CASE_WITH_GAP: [(TokenKind, Range); 10] = [ + (TokenKind::Def, 0..3), + (TokenKind::Name, 4..7), + (TokenKind::Lpar, 7..8), + (TokenKind::Rpar, 8..9), + (TokenKind::Colon, 9..10), + (TokenKind::Newline, 10..11), + // Gap ||..|| + (TokenKind::Comment, 15..24), + (TokenKind::NonLogicalNewline, 24..25), + (TokenKind::Indent, 25..29), + (TokenKind::Pass, 29..33), + // No newline at the end to keep the token set full of unique tokens + ]; + + /// Test case containing [`TokenKind::Unknown`] token. + /// + /// Code: + const TEST_CASE_WITH_UNKNOWN: [(TokenKind, Range); 5] = [ + (TokenKind::Name, 0..1), + (TokenKind::Equal, 2..3), + (TokenKind::Unknown, 4..11), + (TokenKind::Plus, 11..12), + (TokenKind::Int, 13..14), + // No newline at the end to keep the token set full of unique tokens + ]; + + /// Helper function to create [`Tokens`] from an iterator of (kind, range). + fn new_tokens(tokens: impl Iterator)>) -> Tokens { + Tokens::new( + tokens + .map(|(kind, range)| { + Token::new( + kind, + TextRange::new(TextSize::new(range.start), TextSize::new(range.end)), + TokenFlags::empty(), + ) + }) + .collect(), + ) + } + + #[test] + fn tokens_up_to_first_unknown_empty() { + let tokens = Tokens::new(vec![]); + assert_eq!(tokens.up_to_first_unknown(), &[]); + } + + #[test] + fn tokens_up_to_first_unknown_noop() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let up_to_first_unknown = tokens.up_to_first_unknown(); + assert_eq!(up_to_first_unknown.len(), tokens.len()); + } + + #[test] + fn tokens_up_to_first_unknown() { + let tokens = new_tokens(TEST_CASE_WITH_UNKNOWN.into_iter()); + let up_to_first_unknown = tokens.up_to_first_unknown(); + assert_eq!(up_to_first_unknown.len(), 2); + } + + #[test] + fn tokens_after_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(8)); + assert_eq!(after.len(), 7); + assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar); + } + + #[test] + fn tokens_after_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(11)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(13)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_at_last_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(33)); + assert_eq!(after.len(), 0); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_after_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.after(TextSize::new(5)); + } + + #[test] + fn tokens_in_range_at_token_offset() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(4.into(), 10.into())); + assert_eq!(in_range.len(), 4); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon); + } + + #[test] + fn tokens_in_range_start_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(11.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(8.into(), 15.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + fn tokens_in_range_start_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(13.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(9.into(), 13.into())); + assert_eq!(in_range.len(), 2); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_in_range_start_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(5.into(), 10.into())); + } + + #[test] + #[should_panic(expected = "End offset 6 is inside a token range 4..7")] + fn tokens_in_range_end_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(0.into(), 6.into())); + } +} diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index fbb836c7597ca..8504504c8a19a 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -11,11 +11,12 @@ use ruff_python_ast::{ }; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; +use crate::lexer::TokenValue; use crate::parser::progress::ParserProgress; use crate::parser::{helpers, FunctionKind, Parser}; use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType}; use crate::token_set::TokenSet; -use crate::{FStringErrorType, Mode, ParseErrorType, Tok, TokenKind}; +use crate::{FStringErrorType, Mode, ParseErrorType, TokenKind}; use super::{Parenthesized, RecoveryContextKind}; @@ -106,9 +107,24 @@ pub(super) const END_EXPR_SET: TokenSet = TokenSet::new([ const END_SEQUENCE_SET: TokenSet = END_EXPR_SET.remove(TokenKind::Comma); impl<'src> Parser<'src> { + /// Returns `true` if the parser is at a name or keyword (including soft keyword) token. + pub(super) fn at_name_or_keyword(&self) -> bool { + self.at(TokenKind::Name) || self.current_token_kind().is_keyword() + } + + /// Returns `true` if the parser is at a name or soft keyword token. + pub(super) fn at_name_or_soft_keyword(&self) -> bool { + self.at(TokenKind::Name) || self.at_soft_keyword() + } + + /// Returns `true` if the parser is at a soft keyword token. + pub(super) fn at_soft_keyword(&self) -> bool { + self.current_token_kind().is_soft_keyword() + } + /// Returns `true` if the current token is the start of an expression. pub(super) fn at_expr(&self) -> bool { - self.at_ts(EXPR_SET) + self.at_ts(EXPR_SET) || self.at_soft_keyword() } /// Returns `true` if the current token ends a sequence. @@ -459,36 +475,43 @@ impl<'src> Parser<'src> { let range = self.current_token_range(); if self.at(TokenKind::Name) { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { + let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else { unreachable!(); }; - ast::Identifier { + return ast::Identifier { id: name.to_string(), range, - } + }; + } + + if self.current_token_kind().is_soft_keyword() { + let id = self.src_text(range).to_string(); + self.bump_soft_keyword_as_name(); + return ast::Identifier { id, range }; + } + + if self.current_token_kind().is_keyword() { + // Non-soft keyword + self.add_error( + ParseErrorType::OtherError(format!( + "Expected an identifier, but found a keyword {} that cannot be used here", + self.current_token_kind() + )), + range, + ); + + let id = self.src_text(range).to_string(); + self.bump_any(); + ast::Identifier { id, range } } else { - if self.current_token_kind().is_keyword() { - let (tok, range) = self.next_token(); - self.add_error( - ParseErrorType::OtherError(format!( - "Expected an identifier, but found a keyword '{tok}' that cannot be used here" - )), - range, - ); + self.add_error( + ParseErrorType::OtherError("Expected an identifier".into()), + range, + ); - ast::Identifier { - id: tok.to_string(), - range, - } - } else { - self.add_error( - ParseErrorType::OtherError("Expected an identifier".into()), - range, - ); - ast::Identifier { - id: String::new(), - range: self.missing_node_range(), - } + ast::Identifier { + id: String::new(), + range: self.missing_node_range(), } } } @@ -501,7 +524,7 @@ impl<'src> Parser<'src> { let lhs = match self.current_token_kind() { TokenKind::Float => { - let (Tok::Float { value }, _) = self.bump(TokenKind::Float) else { + let TokenValue::Float(value) = self.bump_value(TokenKind::Float) else { unreachable!() }; @@ -511,7 +534,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Complex => { - let (Tok::Complex { real, imag }, _) = self.bump(TokenKind::Complex) else { + let TokenValue::Complex { real, imag } = self.bump_value(TokenKind::Complex) else { unreachable!() }; Expr::NumberLiteral(ast::ExprNumberLiteral { @@ -520,7 +543,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Int => { - let (Tok::Int { value }, _) = self.bump(TokenKind::Int) else { + let TokenValue::Int(value) = self.bump_value(TokenKind::Int) else { unreachable!() }; Expr::NumberLiteral(ast::ExprNumberLiteral { @@ -1231,7 +1254,10 @@ impl<'src> Parser<'src> { /// /// See: fn parse_string_or_byte_literal(&mut self) -> StringType { - let (Tok::String { value, flags }, range) = self.bump(TokenKind::String) else { + let range = self.current_token_range(); + let flags = self.tokens.current_flags().as_any_string_flags(); + + let TokenValue::String(value) = self.bump_value(TokenKind::String) else { unreachable!() }; @@ -1277,18 +1303,17 @@ impl<'src> Parser<'src> { /// See: fn parse_fstring(&mut self) -> ast::FString { let start = self.node_start(); + let flags = self.tokens.current_flags().as_any_string_flags(); - let (Tok::FStringStart(kind), _) = self.bump(TokenKind::FStringStart) else { - unreachable!() - }; - let elements = self.parse_fstring_elements(); + self.bump(TokenKind::FStringStart); + let elements = self.parse_fstring_elements(flags); self.expect(TokenKind::FStringEnd); ast::FString { elements, range: self.node_range(start), - flags: kind.into(), + flags: ast::FStringFlags::from(flags), } } @@ -1297,16 +1322,18 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` or `FStringMiddle` token. - fn parse_fstring_elements(&mut self) -> FStringElements { + fn parse_fstring_elements(&mut self, flags: ast::AnyStringFlags) -> FStringElements { let mut elements = vec![]; self.parse_list(RecoveryContextKind::FStringElements, |parser| { let element = match parser.current_token_kind() { TokenKind::Lbrace => { - FStringElement::Expression(parser.parse_fstring_expression_element()) + FStringElement::Expression(parser.parse_fstring_expression_element(flags)) } TokenKind::FStringMiddle => { - let (Tok::FStringMiddle { value, flags, .. }, range) = parser.next_token() + let range = parser.current_token_range(); + let TokenValue::FStringMiddle(value) = + parser.bump_value(TokenKind::FStringMiddle) else { unreachable!() }; @@ -1332,7 +1359,7 @@ impl<'src> Parser<'src> { // `Invalid` tokens are created when there's a lexical error, so // we ignore it here to avoid creating unexpected token errors TokenKind::Unknown => { - parser.next_token(); + parser.bump_any(); return; } tok => { @@ -1356,7 +1383,10 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` token. - fn parse_fstring_expression_element(&mut self) -> ast::FStringExpressionElement { + fn parse_fstring_expression_element( + &mut self, + flags: ast::AnyStringFlags, + ) -> ast::FStringExpressionElement { let start = self.node_start(); self.bump(TokenKind::Lbrace); @@ -1396,7 +1426,10 @@ impl<'src> Parser<'src> { let conversion = if self.eat(TokenKind::Exclamation) { let conversion_flag_range = self.current_token_range(); - if let Tok::Name { name } = self.next_token().0 { + if self.at(TokenKind::Name) { + let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else { + unreachable!(); + }; match &*name { "s" => ConversionFlag::Str, "r" => ConversionFlag::Repr, @@ -1419,6 +1452,8 @@ impl<'src> Parser<'src> { ParseErrorType::FStringError(FStringErrorType::InvalidConversionFlag), conversion_flag_range, ); + // TODO(dhruvmanila): Avoid dropping this token + self.bump_any(); ConversionFlag::None } } else { @@ -1427,7 +1462,7 @@ impl<'src> Parser<'src> { let format_spec = if self.eat(TokenKind::Colon) { let spec_start = self.node_start(); - let elements = self.parse_fstring_elements(); + let elements = self.parse_fstring_elements(flags); Some(Box::new(ast::FStringFormatSpec { range: self.node_range(spec_start), elements, @@ -2229,7 +2264,8 @@ impl<'src> Parser<'src> { fn parse_ipython_escape_command_expression(&mut self) -> ast::ExprIpyEscapeCommand { let start = self.node_start(); - let (Tok::IpyEscapeCommand { value, kind }, _) = self.bump(TokenKind::IpyEscapeCommand) + let TokenValue::IpyEscapeCommand { value, kind } = + self.bump_value(TokenKind::IpyEscapeCommand) else { unreachable!() }; diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 2545b1dc86211..f1b240cfd6d7b 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -2,20 +2,16 @@ use std::cmp::Ordering; use bitflags::bitflags; -use ast::Mod; -use ruff_python_ast as ast; +use ruff_python_ast::{Mod, ModExpression, ModModule}; use ruff_text_size::{Ranged, TextRange, TextSize}; -use crate::lexer::lex; +use crate::lexer::TokenValue; +use crate::parser::expression::ExpressionContext; use crate::parser::progress::{ParserProgress, TokenId}; -use crate::{ - lexer::{LexResult, Spanned}, - token_set::TokenSet, - token_source::TokenSource, - Mode, ParseError, ParseErrorType, Tok, TokenKind, -}; - -use self::expression::ExpressionContext; +use crate::token_set::TokenSet; +use crate::token_source::{TokenSource, TokenSourceCheckpoint}; +use crate::{Mode, ParseError, ParseErrorType, TokenKind}; +use crate::{Parsed, Tokens}; mod expression; mod helpers; @@ -26,57 +22,12 @@ mod statement; #[cfg(test)] mod tests; -/// Represents the parsed source code. -/// -/// This includes the AST and all of the errors encountered during parsing. -#[derive(Debug)] -pub struct Program { - ast: ast::Mod, - parse_errors: Vec, -} - -impl Program { - /// Returns the parsed AST. - pub fn ast(&self) -> &ast::Mod { - &self.ast - } - - /// Returns a list of syntax errors found during parsing. - pub fn errors(&self) -> &[ParseError] { - &self.parse_errors - } - - /// Consumes the [`Program`] and returns the parsed AST. - pub fn into_ast(self) -> ast::Mod { - self.ast - } - - /// Consumes the [`Program`] and returns a list of syntax errors found during parsing. - pub fn into_errors(self) -> Vec { - self.parse_errors - } - - /// Returns `true` if the program is valid i.e., it has no syntax errors. - pub fn is_valid(&self) -> bool { - self.parse_errors.is_empty() - } - - /// Parse the given Python source code using the specified [`Mode`]. - pub fn parse_str(source: &str, mode: Mode) -> Program { - let tokens = lex(source, mode); - Self::parse_tokens(source, tokens.collect(), mode) - } - - /// Parse a vector of [`LexResult`]s using the specified [`Mode`]. - pub fn parse_tokens(source: &str, tokens: Vec, mode: Mode) -> Program { - Parser::new(source, mode, TokenSource::new(tokens)).parse_program() - } -} - #[derive(Debug)] pub(crate) struct Parser<'src> { source: &'src str, - tokens: TokenSource, + + /// Token source for the parser that skips over any non-trivia token. + tokens: TokenSource<'src>, /// Stores all the syntax errors found during the parsing. errors: Vec, @@ -84,37 +35,29 @@ pub(crate) struct Parser<'src> { /// Specify the mode in which the code will be parsed. mode: Mode, - /// Current token along with its range. - current: Spanned, - /// The ID of the current token. This is used to track the progress of the parser /// to avoid infinite loops when the parser is stuck. current_token_id: TokenId, - /// The end of the last processed. Used to determine a node's end. - last_token_end: TextSize, - - /// The range of the tokens to parse. - /// - /// The range is equal to `[0; source.len())` when parsing an entire file. The range can be - /// different when parsing only a part of a file using the [`crate::lex_starts_at`] and - /// [`crate::parse_expression_starts_at`] APIs in which case the the range is equal to - /// `[offset; subrange.len())`. - tokens_range: TextRange, + /// The end of the previous token processed. This is used to determine a node's end. + prev_token_end: TextSize, + /// The recovery context in which the parser is currently in. recovery_context: RecoveryContext, + + /// The start offset in the source code from which to start parsing at. + start_offset: TextSize, } impl<'src> Parser<'src> { - pub(crate) fn new(source: &'src str, mode: Mode, mut tokens: TokenSource) -> Parser<'src> { - let tokens_range = TextRange::new( - tokens.position().unwrap_or_default(), - tokens.end().unwrap_or_default(), - ); + /// Create a new parser for the given source code. + pub(crate) fn new(source: &'src str, mode: Mode) -> Self { + Parser::new_starts_at(source, mode, TextSize::new(0)) + } - let current = tokens - .next() - .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(tokens_range.end()))); + /// Create a new parser for the given source code which starts parsing at the given offset. + pub(crate) fn new_starts_at(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { + let tokens = TokenSource::from_source(source, mode, start_offset); Parser { mode, @@ -122,24 +65,20 @@ impl<'src> Parser<'src> { errors: Vec::new(), tokens, recovery_context: RecoveryContext::empty(), - last_token_end: tokens_range.start(), - current, + prev_token_end: TextSize::new(0), + start_offset, current_token_id: TokenId::default(), - tokens_range, } } - /// Consumes the [`Parser`] and returns the parsed [`Program`]. - pub(crate) fn parse_program(mut self) -> Program { - let ast = match self.mode { + /// Consumes the [`Parser`] and returns the parsed [`Parsed`]. + pub(crate) fn parse(mut self) -> Parsed { + let syntax = match self.mode { Mode::Expression => Mod::Expression(self.parse_single_expression()), Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()), }; - Program { - ast, - parse_errors: self.finish(), - } + self.finish(syntax) } /// Parses a single expression. @@ -150,7 +89,7 @@ impl<'src> Parser<'src> { /// /// After parsing a single expression, an error is reported and all remaining tokens are /// dropped by the parser. - fn parse_single_expression(&mut self) -> ast::ModExpression { + fn parse_single_expression(&mut self) -> ModExpression { let start = self.node_start(); let parsed_expr = self.parse_expression_list(ExpressionContext::default()); @@ -170,13 +109,13 @@ impl<'src> Parser<'src> { if self.at(TokenKind::EndOfFile) { break; } - self.next_token(); + self.bump_any(); } } self.bump(TokenKind::EndOfFile); - ast::ModExpression { + ModExpression { body: Box::new(parsed_expr.expr), range: self.node_range(start), } @@ -185,7 +124,7 @@ impl<'src> Parser<'src> { /// Parses a Python module. /// /// This is to be used for [`Mode::Module`] and [`Mode::Ipython`]. - fn parse_module(&mut self) -> ast::ModModule { + fn parse_module(&mut self) -> ModModule { let body = self.parse_list_into_vec( RecoveryContextKind::ModuleStatements, Parser::parse_statement, @@ -193,13 +132,13 @@ impl<'src> Parser<'src> { self.bump(TokenKind::EndOfFile); - ast::ModModule { + ModModule { body, - range: self.tokens_range, + range: TextRange::new(self.start_offset, self.current_token_range().end()), } } - fn finish(self) -> Vec { + fn finish(self, syntax: Mod) -> Parsed { assert_eq!( self.current_token_kind(), TokenKind::EndOfFile, @@ -208,13 +147,18 @@ impl<'src> Parser<'src> { // TODO consider re-integrating lexical error handling into the parser? let parse_errors = self.errors; - let lex_errors = self.tokens.finish(); + let (tokens, comment_ranges, lex_errors) = self.tokens.finish(); // Fast path for when there are no lex errors. // There's no fast path for when there are no parse errors because a lex error // always results in a parse error. if lex_errors.is_empty() { - return parse_errors; + return Parsed { + syntax, + tokens: Tokens::new(tokens), + comment_ranges, + errors: parse_errors, + }; } let mut merged = Vec::with_capacity(parse_errors.len().saturating_add(lex_errors.len())); @@ -241,7 +185,12 @@ impl<'src> Parser<'src> { merged.extend(parse_errors); merged.extend(lex_errors.map(ParseError::from)); - merged + Parsed { + syntax, + tokens: Tokens::new(tokens), + comment_ranges, + errors: merged, + } } /// Returns the start position for a node that starts at the current token. @@ -280,7 +229,7 @@ impl<'src> Parser<'src> { // // In either of the above cases, there's a "gap" between the end of the last token and start // of the current token. - if self.last_token_end <= start { + if self.prev_token_end <= start { // We need to create an empty range at the last token end instead of the start because // otherwise this node range will fall outside the range of it's parent node. Taking // the above example: @@ -302,9 +251,9 @@ impl<'src> Parser<'src> { // def foo # comment // def bar(): ... // def baz - TextRange::empty(self.last_token_end) + TextRange::empty(self.prev_token_end) } else { - TextRange::new(start, self.last_token_end) + TextRange::new(start, self.prev_token_end) } } @@ -319,65 +268,48 @@ impl<'src> Parser<'src> { // # ^^^^ expression range // # ^ last token end // ``` - TextRange::empty(self.last_token_end) + TextRange::empty(self.prev_token_end) } /// Moves the parser to the next token. - /// - /// Returns the old current token as an owned value. - fn next_token(&mut self) -> Spanned { - let next = self - .tokens - .next() - .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(self.tokens_range.end()))); - - self.current_token_id.increment(); - - let current = std::mem::replace(&mut self.current, next); - + fn do_bump(&mut self, kind: TokenKind) { if !matches!( - current.0, + self.current_token_kind(), // TODO explore including everything up to the dedent as part of the body. - Tok::Dedent + TokenKind::Dedent // Don't include newlines in the body - | Tok::Newline + | TokenKind::Newline // TODO(micha): Including the semi feels more correct but it isn't compatible with lalrpop and breaks the // formatters semicolon detection. Exclude it for now - | Tok::Semi + | TokenKind::Semi ) { - self.last_token_end = current.1.end(); + self.prev_token_end = self.current_token_range().end(); } - current + self.tokens.bump(kind); + self.current_token_id.increment(); } /// Returns the next token kind without consuming it. - fn peek(&self) -> TokenKind { - self.tokens - .peek() - .map_or(TokenKind::EndOfFile, |spanned| spanned.0) + fn peek(&mut self) -> TokenKind { + self.tokens.peek() } - /// Returns the current token kind along with its range. - /// - /// Use [`Parser::current_token_kind`] or [`Parser::current_token_range`] to only get the kind - /// or range respectively. - #[inline] - fn current_token(&self) -> (TokenKind, TextRange) { - (self.current_token_kind(), self.current_token_range()) + /// Returns the next two token kinds without consuming it. + fn peek2(&mut self) -> (TokenKind, TokenKind) { + self.tokens.peek2() } /// Returns the current token kind. #[inline] fn current_token_kind(&self) -> TokenKind { - // TODO: Converting the token kind over and over again can be expensive. - TokenKind::from_token(&self.current.0) + self.tokens.current_kind() } /// Returns the range of the current token. #[inline] fn current_token_range(&self) -> TextRange { - self.current.1 + self.tokens.current_range() } /// Returns the current token ID. @@ -386,50 +318,88 @@ impl<'src> Parser<'src> { self.current_token_id } - /// Eat the current token if it is of the given kind, returning `true` in - /// that case. Otherwise, return `false`. - fn eat(&mut self, kind: TokenKind) -> bool { - if self.at(kind) { - self.next_token(); - true - } else { - false - } - } - /// Bumps the current token assuming it is of the given kind. /// - /// Returns the current token as an owned value. - /// /// # Panics /// /// If the current token is not of the given kind. - fn bump(&mut self, kind: TokenKind) -> (Tok, TextRange) { + fn bump(&mut self, kind: TokenKind) { assert_eq!(self.current_token_kind(), kind); - self.next_token() + self.do_bump(kind); } - /// Bumps the current token assuming it is found in the given token set. + /// Take the token value from the underlying token source and bump the current token. + /// + /// # Panics /// - /// Returns the current token as an owned value. + /// If the current token is not of the given kind. + fn bump_value(&mut self, kind: TokenKind) -> TokenValue { + let value = self.tokens.take_value(); + self.bump(kind); + value + } + + /// Bumps the current token assuming it is found in the given token set. /// /// # Panics /// /// If the current token is not found in the given token set. - fn bump_ts(&mut self, ts: TokenSet) -> (Tok, TextRange) { - assert!(ts.contains(self.current_token_kind())); + fn bump_ts(&mut self, ts: TokenSet) { + let kind = self.current_token_kind(); + assert!(ts.contains(kind)); - self.next_token() + self.do_bump(kind); } + /// Bumps the current token regardless of its kind and advances to the next token. + /// + /// # Panics + /// + /// If the parser is at end of file. + fn bump_any(&mut self) { + let kind = self.current_token_kind(); + assert_ne!(kind, TokenKind::EndOfFile); + + self.do_bump(kind); + } + + /// Bumps the soft keyword token as a `Name` token. + /// + /// # Panics + /// + /// If the current token is not a soft keyword. + pub(crate) fn bump_soft_keyword_as_name(&mut self) { + assert!(self.at_soft_keyword()); + + self.do_bump(TokenKind::Name); + } + + /// Consume the current token if it is of the given kind. Returns `true` if it matches, `false` + /// otherwise. + fn eat(&mut self, kind: TokenKind) -> bool { + if self.at(kind) { + self.do_bump(kind); + true + } else { + false + } + } + + /// Eat the current token if its of the expected kind, otherwise adds an appropriate error. fn expect(&mut self, expected: TokenKind) -> bool { if self.eat(expected) { return true; } - let (found, range) = self.current_token(); - self.add_error(ParseErrorType::ExpectedToken { found, expected }, range); + self.add_error( + ParseErrorType::ExpectedToken { + found: self.current_token_kind(), + expected, + }, + self.current_token_range(), + ); + false } @@ -468,11 +438,7 @@ impl<'src> Parser<'src> { where T: Ranged, { - let range = ranged.range(); - // `ranged` uses absolute ranges to the source text of an entire file. Fix the source by - // subtracting the start offset when parsing only a part of a file (when parsing the tokens - // from `lex_starts_at`). - &self.source[range - self.tokens_range.start()] + &self.source[ranged.range()] } /// Parses a list of elements into a vector where each element is parsed using @@ -531,7 +497,7 @@ impl<'src> Parser<'src> { break; } - self.next_token(); + self.bump_any(); } } @@ -615,7 +581,7 @@ impl<'src> Parser<'src> { trailing_comma_range = None; } - self.next_token(); + self.bump_any(); } } @@ -641,6 +607,42 @@ impl<'src> Parser<'src> { false } + + /// Creates a checkpoint to which the parser can later return to using [`Self::rewind`]. + fn checkpoint(&self) -> ParserCheckpoint<'src> { + ParserCheckpoint { + tokens: self.tokens.checkpoint(), + errors_position: self.errors.len(), + current_token_id: self.current_token_id, + prev_token_end: self.prev_token_end, + recovery_context: self.recovery_context, + } + } + + /// Restore the parser to the given checkpoint. + fn rewind(&mut self, checkpoint: ParserCheckpoint<'src>) { + let ParserCheckpoint { + tokens, + errors_position, + current_token_id, + prev_token_end, + recovery_context, + } = checkpoint; + + self.tokens.rewind(tokens); + self.errors.truncate(errors_position); + self.current_token_id = current_token_id; + self.prev_token_end = prev_token_end; + self.recovery_context = recovery_context; + } +} + +struct ParserCheckpoint<'src> { + tokens: TokenSourceCheckpoint<'src>, + errors_position: usize, + current_token_id: TokenId, + prev_token_end: TextSize, + recovery_context: RecoveryContext, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -872,7 +874,7 @@ impl RecoveryContextKind { fn is_list_terminator(self, p: &Parser) -> bool { match self { - // The program must consume all tokens until the end + // The parser must consume all tokens until the end RecoveryContextKind::ModuleStatements => false, RecoveryContextKind::BlockStatements => p.at(TokenKind::Dedent), @@ -1008,9 +1010,9 @@ impl RecoveryContextKind { RecoveryContextKind::Except => p.at(TokenKind::Except), RecoveryContextKind::AssignmentTargets => p.at(TokenKind::Equal), RecoveryContextKind::TypeParams => p.at_type_param(), - RecoveryContextKind::ImportNames => p.at(TokenKind::Name), + RecoveryContextKind::ImportNames => p.at_name_or_soft_keyword(), RecoveryContextKind::ImportFromAsNames(_) => { - matches!(p.current_token_kind(), TokenKind::Star | TokenKind::Name) + p.at(TokenKind::Star) || p.at_name_or_soft_keyword() } RecoveryContextKind::Slices => p.at(TokenKind::Colon) || p.at_expr(), RecoveryContextKind::ListElements @@ -1029,11 +1031,13 @@ impl RecoveryContextKind { RecoveryContextKind::MatchPatternClassArguments => p.at_pattern_start(), RecoveryContextKind::Arguments => p.at_expr(), RecoveryContextKind::DeleteTargets => p.at_expr(), - RecoveryContextKind::Identifiers => p.at(TokenKind::Name), - RecoveryContextKind::Parameters(_) => matches!( - p.current_token_kind(), - TokenKind::Name | TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash - ), + RecoveryContextKind::Identifiers => p.at_name_or_soft_keyword(), + RecoveryContextKind::Parameters(_) => { + matches!( + p.current_token_kind(), + TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash + ) || p.at_name_or_soft_keyword() + } RecoveryContextKind::WithItems(_) => p.at_expr(), RecoveryContextKind::FStringElements => matches!( p.current_token_kind(), diff --git a/crates/ruff_python_parser/src/parser/pattern.rs b/crates/ruff_python_parser/src/parser/pattern.rs index 4d200b4e42de1..c0fc818ca0931 100644 --- a/crates/ruff_python_parser/src/parser/pattern.rs +++ b/crates/ruff_python_parser/src/parser/pattern.rs @@ -1,10 +1,11 @@ use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton}; use ruff_text_size::{Ranged, TextSize}; +use crate::lexer::TokenValue; use crate::parser::progress::ParserProgress; use crate::parser::{recovery, Parser, RecoveryContextKind, SequenceMatchPatternParentheses}; use crate::token_set::TokenSet; -use crate::{ParseErrorType, Tok, TokenKind}; +use crate::{ParseErrorType, TokenKind}; use super::expression::ExpressionContext; @@ -50,12 +51,12 @@ const MAPPING_PATTERN_START_SET: TokenSet = TokenSet::new([ impl<'src> Parser<'src> { /// Returns `true` if the current token is a valid start of a pattern. pub(super) fn at_pattern_start(&self) -> bool { - self.at_ts(PATTERN_START_SET) + self.at_ts(PATTERN_START_SET) || self.at_soft_keyword() } /// Returns `true` if the current token is a valid start of a mapping pattern. pub(super) fn at_mapping_pattern_start(&self) -> bool { - self.at_ts(MAPPING_PATTERN_START_SET) + self.at_ts(MAPPING_PATTERN_START_SET) || self.at_soft_keyword() } /// Entry point to start parsing a pattern. @@ -397,7 +398,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Complex => { - let (Tok::Complex { real, imag }, _) = self.bump(TokenKind::Complex) else { + let TokenValue::Complex { real, imag } = self.bump_value(TokenKind::Complex) else { unreachable!() }; let range = self.node_range(start); @@ -411,7 +412,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Int => { - let (Tok::Int { value }, _) = self.bump(TokenKind::Int) else { + let TokenValue::Int(value) = self.bump_value(TokenKind::Int) else { unreachable!() }; let range = self.node_range(start); @@ -425,7 +426,7 @@ impl<'src> Parser<'src> { }) } TokenKind::Float => { - let (Tok::Float { value }, _) = self.bump(TokenKind::Float) else { + let TokenValue::Float(value) = self.bump_value(TokenKind::Float) else { unreachable!() }; let range = self.node_range(start); @@ -438,46 +439,6 @@ impl<'src> Parser<'src> { range, }) } - TokenKind::Name if self.peek() == TokenKind::Dot => { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { - unreachable!() - }; - let id = Expr::Name(ast::ExprName { - id: name.to_string(), - ctx: ExprContext::Load, - range: self.node_range(start), - }); - - let attribute = self.parse_attr_expr_for_match_pattern(id, start); - - Pattern::MatchValue(ast::PatternMatchValue { - value: Box::new(attribute), - range: self.node_range(start), - }) - } - TokenKind::Name => { - let (Tok::Name { name }, _) = self.bump(TokenKind::Name) else { - unreachable!() - }; - let range = self.node_range(start); - - // test_ok match_as_pattern - // match foo: - // case foo_bar: ... - // case _: ... - Pattern::MatchAs(ast::PatternMatchAs { - range, - pattern: None, - name: if &*name == "_" { - None - } else { - Some(ast::Identifier { - id: name.to_string(), - range, - }) - }, - }) - } kind => { // The `+` is only for better error recovery. if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() { @@ -506,26 +467,57 @@ impl<'src> Parser<'src> { } } - // Upon encountering an unexpected token, return a `Pattern::MatchValue` containing - // an empty `Expr::Name`. - let invalid_node = if kind.is_keyword() { - Expr::Name(self.parse_name()) + if self.at_name_or_keyword() { + if self.peek() == TokenKind::Dot { + // test_ok match_attr_pattern_soft_keyword + // match foo: + // case match.bar: ... + // case case.bar: ... + // case type.bar: ... + // case match.case.type.bar.type.case.match: ... + let id = Expr::Name(self.parse_name()); + + let attribute = self.parse_attr_expr_for_match_pattern(id, start); + + Pattern::MatchValue(ast::PatternMatchValue { + value: Box::new(attribute), + range: self.node_range(start), + }) + } else { + // test_ok match_as_pattern_soft_keyword + // match foo: + // case case: ... + // case match: ... + // case type: ... + let ident = self.parse_identifier(); + + // test_ok match_as_pattern + // match foo: + // case foo_bar: ... + // case _: ... + Pattern::MatchAs(ast::PatternMatchAs { + range: ident.range, + pattern: None, + name: if &ident == "_" { None } else { Some(ident) }, + }) + } } else { + // Upon encountering an unexpected token, return a `Pattern::MatchValue` containing + // an empty `Expr::Name`. self.add_error( ParseErrorType::OtherError("Expected a pattern".to_string()), self.current_token_range(), ); - Expr::Name(ast::ExprName { + let invalid_node = Expr::Name(ast::ExprName { range: self.missing_node_range(), id: String::new(), ctx: ExprContext::Invalid, + }); + Pattern::MatchValue(ast::PatternMatchValue { + range: invalid_node.range(), + value: Box::new(invalid_node), }) - }; - - Pattern::MatchValue(ast::PatternMatchValue { - range: invalid_node.range(), - value: Box::new(invalid_node), - }) + } } } } diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 69d7ec8a57abd..3e9a047db10c0 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -8,13 +8,14 @@ use ruff_python_ast::{ }; use ruff_text_size::{Ranged, TextSize}; +use crate::lexer::TokenValue; use crate::parser::expression::{GeneratorExpressionInParentheses, ParsedExpr, EXPR_SET}; use crate::parser::progress::ParserProgress; use crate::parser::{ helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind, }; use crate::token_set::TokenSet; -use crate::{Mode, ParseErrorType, Tok, TokenKind}; +use crate::{Mode, ParseErrorType, TokenKind}; use super::expression::{ExpressionContext, OperatorPrecedence}; use super::Parenthesized; @@ -84,13 +85,13 @@ impl<'src> Parser<'src> { /// Returns `true` if the current token is the start of a simple statement, /// including expressions. fn at_simple_stmt(&self) -> bool { - self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) + self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) || self.at_soft_keyword() } /// Returns `true` if the current token is the start of a simple, compound or expression /// statement. pub(super) fn at_stmt(&self) -> bool { - self.at_ts(STMTS_SET) + self.at_ts(STMTS_SET) || self.at_soft_keyword() } /// Checks if the parser is currently positioned at the start of a type parameter. @@ -120,8 +121,26 @@ impl<'src> Parser<'src> { TokenKind::With => Stmt::With(self.parse_with_statement(start)), TokenKind::At => self.parse_decorators(), TokenKind::Async => self.parse_async_statement(), - TokenKind::Match => Stmt::Match(self.parse_match_statement()), - _ => self.parse_single_simple_statement(), + token => { + if token == TokenKind::Match { + // Match is considered a soft keyword, so we will treat it as an identifier if + // it's followed by an unexpected token. + + match self.classify_match_token() { + MatchTokenKind::Keyword => { + return Stmt::Match(self.parse_match_statement()); + } + MatchTokenKind::KeywordOrIdentifier => { + if let Some(match_stmt) = self.try_parse_match_statement() { + return Stmt::Match(match_stmt); + } + } + MatchTokenKind::Identifier => {} + } + } + + self.parse_single_simple_statement() + } } } @@ -252,11 +271,22 @@ impl<'src> Parser<'src> { TokenKind::Assert => Stmt::Assert(self.parse_assert_statement()), TokenKind::Global => Stmt::Global(self.parse_global_statement()), TokenKind::Nonlocal => Stmt::Nonlocal(self.parse_nonlocal_statement()), - TokenKind::Type => Stmt::TypeAlias(self.parse_type_alias_statement()), TokenKind::IpyEscapeCommand => { Stmt::IpyEscapeCommand(self.parse_ipython_escape_command_statement()) } - _ => { + token => { + if token == TokenKind::Type { + // Type is considered a soft keyword, so we will treat it as an identifier if + // it's followed by an unexpected token. + let (first, second) = self.peek2(); + + if (first == TokenKind::Name || first.is_soft_keyword()) + && matches!(second, TokenKind::Lsqb | TokenKind::Equal) + { + return Stmt::TypeAlias(self.parse_type_alias_statement()); + } + } + let start = self.node_start(); // simple_stmt: `... | yield_stmt | star_expressions | ...` @@ -498,7 +528,12 @@ impl<'src> Parser<'src> { } } - let module = if self.at(TokenKind::Name) { + let module = if self.at_name_or_soft_keyword() { + // test_ok from_import_soft_keyword_module_name + // from match import pattern + // from type import bar + // from case import pattern + // from match.type.case import foo Some(self.parse_dotted_name()) } else { if leading_dots == 0 { @@ -603,7 +638,11 @@ impl<'src> Parser<'src> { }; let asname = if self.eat(TokenKind::As) { - if self.at(TokenKind::Name) { + if self.at_name_or_soft_keyword() { + // test_ok import_as_name_soft_keyword + // import foo as match + // import bar as case + // import baz as type Some(self.parse_identifier()) } else { // test_err import_alias_missing_asname @@ -872,7 +911,8 @@ impl<'src> Parser<'src> { fn parse_ipython_escape_command_statement(&mut self) -> ast::StmtIpyEscapeCommand { let start = self.node_start(); - let (Tok::IpyEscapeCommand { value, kind }, _) = self.bump(TokenKind::IpyEscapeCommand) + let TokenValue::IpyEscapeCommand { value, kind } = + self.bump_value(TokenKind::IpyEscapeCommand) else { unreachable!() }; @@ -1469,7 +1509,12 @@ impl<'src> Parser<'src> { }; let name = if self.eat(TokenKind::As) { - if self.at(TokenKind::Name) { + if self.at_name_or_soft_keyword() { + // test_ok except_stmt_as_name_soft_keyword + // try: ... + // except Exception as match: ... + // except Exception as case: ... + // except Exception as type: ... Some(self.parse_identifier()) } else { // test_err except_stmt_missing_as_name @@ -2327,6 +2372,84 @@ impl<'src> Parser<'src> { target } + /// Try parsing a `match` statement. + /// + /// This uses speculative parsing to remove the ambiguity of whether the `match` token is used + /// as a keyword or an identifier. This ambiguity arises only in if the `match` token is + /// followed by certain tokens. For example, if `match` is followed by `[`, we can't know if + /// it's used in the context of a subscript expression or as a list expression: + /// + /// ```python + /// # Subcript expression; `match` is an identifier + /// match[x] + /// + /// # List expression; `match` is a keyword + /// match [x, y]: + /// case [1, 2]: + /// pass + /// ``` + /// + /// This is done by parsing the subject expression considering `match` as a keyword token. + /// Then, based on certain heuristics we'll determine if our assumption is true. If so, we'll + /// continue parsing the entire match statement. Otherwise, return `None`. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `match` token. + /// + /// See: + fn try_parse_match_statement(&mut self) -> Option { + let checkpoint = self.checkpoint(); + + let start = self.node_start(); + self.bump(TokenKind::Match); + + let subject = self.parse_match_subject_expression(); + + match self.current_token_kind() { + TokenKind::Colon => { + // `match` is a keyword + self.bump(TokenKind::Colon); + + let cases = self.parse_match_body(); + + Some(ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + }) + } + TokenKind::Newline if matches!(self.peek2(), (TokenKind::Indent, TokenKind::Case)) => { + // `match` is a keyword + + // test_err match_expected_colon + // match [1, 2] + // case _: ... + self.add_error( + ParseErrorType::ExpectedToken { + found: self.current_token_kind(), + expected: TokenKind::Colon, + }, + self.current_token_range(), + ); + + let cases = self.parse_match_body(); + + Some(ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + }) + } + _ => { + // `match` is an identifier + self.rewind(checkpoint); + + None + } + } + } + /// Parses a match statement. /// /// # Panics @@ -2338,7 +2461,21 @@ impl<'src> Parser<'src> { let start = self.node_start(); self.bump(TokenKind::Match); - let subject_start = self.node_start(); + let subject = self.parse_match_subject_expression(); + self.expect(TokenKind::Colon); + + let cases = self.parse_match_body(); + + ast::StmtMatch { + subject: Box::new(subject), + cases, + range: self.node_range(start), + } + } + + /// Parses the subject expression for a `match` statement. + fn parse_match_subject_expression(&mut self) -> Expr { + let start = self.node_start(); // Subject expression grammar is: // @@ -2370,13 +2507,12 @@ impl<'src> Parser<'src> { // case _: ... // match yield x: // case _: ... - let subject = if self.at(TokenKind::Comma) { - let tuple = - self.parse_tuple_expression(subject.expr, subject_start, Parenthesized::No, |p| { - p.parse_named_expression_or_higher(ExpressionContext::starred_bitwise_or()) - }); + if self.at(TokenKind::Comma) { + let tuple = self.parse_tuple_expression(subject.expr, start, Parenthesized::No, |p| { + p.parse_named_expression_or_higher(ExpressionContext::starred_bitwise_or()) + }); - Expr::Tuple(tuple).into() + Expr::Tuple(tuple) } else { if subject.is_unparenthesized_starred_expr() { // test_err match_stmt_single_starred_subject @@ -2384,11 +2520,15 @@ impl<'src> Parser<'src> { // case _: ... self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &subject); } - subject - }; - - self.expect(TokenKind::Colon); + subject.expr + } + } + /// Parses the body of a `match` statement. + /// + /// This method expects that the parser is positioned at a `Newline` token. If not, it adds a + /// syntax error and continues parsing. + fn parse_match_body(&mut self) -> Vec { // test_err match_stmt_no_newline_before_case // match foo: case _: ... self.expect(TokenKind::Newline); @@ -2411,11 +2551,7 @@ impl<'src> Parser<'src> { // TODO(dhruvmanila): Should we expect `Dedent` only if there was an `Indent` present? self.expect(TokenKind::Dedent); - ast::StmtMatch { - subject: Box::new(subject.expr), - cases, - range: self.node_range(start), - } + cases } /// Parses a list of match case blocks. @@ -2458,7 +2594,6 @@ impl<'src> Parser<'src> { self.bump(TokenKind::Case); // test_err match_stmt_missing_pattern - // # TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer // match x: // case : ... let pattern = self.parse_match_patterns(); @@ -2557,8 +2692,6 @@ impl<'src> Parser<'src> { // async while test: ... // async x = 1 // async async def foo(): ... - // # TODO(dhruvmanila): Here, `match` is actually a Name token because - // # of the soft keyword # transformer // async match test: // case _: ... self.add_error( @@ -2890,7 +3023,7 @@ impl<'src> Parser<'src> { let star_range = parser.current_token_range(); parser.bump(TokenKind::Star); - if parser.at(TokenKind::Name) { + if parser.at_name_or_soft_keyword() { let param = parser.parse_parameter(param_start, function_kind, AllowStarAnnotation::Yes); let param_star_range = parser.node_range(star_range.start()); @@ -3049,7 +3182,7 @@ impl<'src> Parser<'src> { last_keyword_only_separator_range = None; } - TokenKind::Name => { + _ if parser.at_name_or_soft_keyword() => { let param = parser.parse_parameter_with_default(param_start, function_kind); // TODO(dhruvmanila): Pyright seems to only highlight the first non-default argument @@ -3386,6 +3519,122 @@ impl<'src> Parser<'src> { } } + /// Classify the `match` soft keyword token. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `match` token. + fn classify_match_token(&mut self) -> MatchTokenKind { + assert_eq!(self.current_token_kind(), TokenKind::Match); + + let (first, second) = self.peek2(); + + match first { + // test_ok match_classify_as_identifier_1 + // match not in case + TokenKind::Not if second == TokenKind::In => MatchTokenKind::Identifier, + + // test_ok match_classify_as_keyword_1 + // match foo: + // case _: ... + // match 1: + // case _: ... + // match 1.0: + // case _: ... + // match 1j: + // case _: ... + // match "foo": + // case _: ... + // match f"foo {x}": + // case _: ... + // match {1, 2}: + // case _: ... + // match ~foo: + // case _: ... + // match ...: + // case _: ... + // match not foo: + // case _: ... + // match await foo(): + // case _: ... + // match lambda foo: foo: + // case _: ... + + // test_err match_classify_as_keyword + // match yield foo: + // case _: ... + TokenKind::Name + | TokenKind::Int + | TokenKind::Float + | TokenKind::Complex + | TokenKind::String + | TokenKind::FStringStart + | TokenKind::Lbrace + | TokenKind::Tilde + | TokenKind::Ellipsis + | TokenKind::Not + | TokenKind::Await + | TokenKind::Yield + | TokenKind::Lambda => MatchTokenKind::Keyword, + + // test_ok match_classify_as_keyword_or_identifier + // match (1, 2) # Identifier + // match (1, 2): # Keyword + // case _: ... + // match [1:] # Identifier + // match [1, 2]: # Keyword + // case _: ... + // match * foo # Identifier + // match - foo # Identifier + // match -foo: # Keyword + // case _: ... + + // test_err match_classify_as_keyword_or_identifier + // match *foo: # Keyword + // case _: ... + TokenKind::Lpar + | TokenKind::Lsqb + | TokenKind::Star + | TokenKind::Plus + | TokenKind::Minus => MatchTokenKind::KeywordOrIdentifier, + + _ => { + if first.is_soft_keyword() || first.is_singleton() { + // test_ok match_classify_as_keyword_2 + // match match: + // case _: ... + // match case: + // case _: ... + // match type: + // case _: ... + // match None: + // case _: ... + // match True: + // case _: ... + // match False: + // case _: ... + MatchTokenKind::Keyword + } else { + // test_ok match_classify_as_identifier_2 + // match + // match != foo + // (foo, match) + // [foo, match] + // {foo, match} + // match; + // match: int + // match, + // match.foo + // match / foo + // match << foo + // match and foo + // match is not foo + MatchTokenKind::Identifier + } + } + } + } + /// Specialized [`Parser::parse_list_into_vec`] for parsing a sequence of clauses. /// /// The difference is that the parser only continues parsing for as long as it sees the token @@ -3477,6 +3726,46 @@ impl Display for Clause { } } +/// The classification of the `match` token. +/// +/// The `match` token is a soft keyword which means, depending on the context, it can be used as a +/// keyword or an identifier. +#[derive(Debug, Clone, Copy)] +enum MatchTokenKind { + /// The `match` token is used as a keyword. + /// + /// For example: + /// ```python + /// match foo: + /// case _: + /// pass + /// ``` + Keyword, + + /// The `match` token is used as an identifier. + /// + /// For example: + /// ```python + /// match.values() + /// match is None + /// ```` + Identifier, + + /// The `match` token is used as either a keyword or an identifier. + /// + /// For example: + /// ```python + /// # Used as a keyword + /// match [x, y]: + /// case [1, 2]: + /// pass + /// + /// # Used as an identifier + /// match[x] + /// ``` + KeywordOrIdentifier, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum WithItemParsingState { /// The parser is currently parsing a with item without any ambiguity. diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs index ec23d01d277f5..09bc41e7f7b66 100644 --- a/crates/ruff_python_parser/src/parser/tests.rs +++ b/crates/ruff_python_parser/src/parser/tests.rs @@ -1,4 +1,4 @@ -use crate::{lex, parse, parse_expression, parse_suite, parse_tokens, Mode}; +use crate::{parse, parse_expression, parse_module, Mode}; #[test] fn test_modes() { @@ -45,23 +45,23 @@ fn test_expr_mode_valid_syntax() { let source = "first "; - let expr = parse_expression(source).unwrap(); + let parsed = parse_expression(source).unwrap(); - insta::assert_debug_snapshot!(expr); + insta::assert_debug_snapshot!(parsed.expr()); } #[test] fn test_unicode_aliases() { // https://github.com/RustPython/RustPython/issues/4566 let source = r#"x = "\N{BACKSPACE}another cool trick""#; - let parse_ast = parse_suite(source).unwrap(); + let suite = parse_module(source).unwrap().into_suite(); - insta::assert_debug_snapshot!(parse_ast); + insta::assert_debug_snapshot!(suite); } #[test] fn test_ipython_escape_commands() { - let parse_ast = parse( + let parsed = parse( r" # Normal Python code ( @@ -132,21 +132,5 @@ foo.bar[0].baz[2].egg?? Mode::Ipython, ) .unwrap(); - insta::assert_debug_snapshot!(parse_ast); -} - -#[test] -fn test_ipython_escape_command_parse_error() { - let source = r" -a = 1 -%timeit a == 1 - " - .trim(); - let lxr = lex(source, Mode::Ipython); - let parse_err = parse_tokens(lxr.collect(), source, Mode::Module).unwrap_err(); - assert_eq!( - parse_err.to_string(), - "IPython escape commands are only allowed in `Mode::Ipython` at byte range 6..20" - .to_string() - ); + insta::assert_debug_snapshot!(parsed.syntax()); } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap index c4232bccf1f98..248f1eab3feb0 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__assignment.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Name { - name: "a_variable", - }, + Name( + "a_variable", + ), 0..10, ), ( @@ -14,9 +16,9 @@ expression: lex_source(source) 11..12, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 13..15, ), ( @@ -24,9 +26,9 @@ expression: lex_source(source) 16..17, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 18..19, ), ( @@ -34,9 +36,9 @@ expression: lex_source(source) 19..20, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 20..21, ), ( @@ -44,3 +46,4 @@ expression: lex_source(source) 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap index 5a0e7933e989a..9e3a9cee5a8a8 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_mac_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(MAC_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(MAC_EOL) 10..11, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 11..14, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(MAC_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap index 3fdbd4c10f384..6b884348b1f3d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_unix_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(UNIX_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(UNIX_EOL) 10..11, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 11..14, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(UNIX_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap index fcf5cfcb80a16..fa240892394c2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__comment_until_windows_eol.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: comment_until_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - Int { - value: 123, - }, + Int( + 123, + ), 0..3, ), ( - Comment( - "# Foo", - ), + Comment, 5..10, ), ( @@ -20,9 +20,9 @@ expression: comment_until_eol(WINDOWS_EOL) 10..12, ), ( - Int { - value: 456, - }, + Int( + 456, + ), 12..15, ), ( @@ -30,3 +30,4 @@ expression: comment_until_eol(WINDOWS_EOL) 15..15, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap new file mode 100644 index 0000000000000..698e077bffe75 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__dedent_after_whitespace.snap @@ -0,0 +1,79 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(source) +--- +## Tokens +``` +[ + ( + If, + 0..2, + ), + ( + Name( + "first", + ), + 3..8, + ), + ( + Colon, + 8..9, + ), + ( + Newline, + 9..10, + ), + ( + Indent, + 10..14, + ), + ( + If, + 14..16, + ), + ( + Name( + "second", + ), + 17..23, + ), + ( + Colon, + 23..24, + ), + ( + Newline, + 24..25, + ), + ( + Indent, + 25..33, + ), + ( + Pass, + 33..37, + ), + ( + Newline, + 37..38, + ), + ( + Dedent, + 42..42, + ), + ( + Name( + "foo", + ), + 42..45, + ), + ( + Newline, + 45..46, + ), + ( + Dedent, + 46..46, + ), +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap index 498d3cc42641e..f877c10beee72 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(MAC_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(MAC_EOL) 21..27, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 28..30, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(MAC_EOL) 32..32, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap index a27a11a6cb445..7c2082732f60d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 22..28, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 29..31, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(MAC_EOL) 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap index 69fe4a3cce947..214b1734108d3 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 22..28, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 29..31, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(UNIX_EOL) 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap index f07534c23e1b3..79bb8e6f48e9d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_tabs_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_tabs_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 13..15, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 25..31, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 32..34, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_tabs_eol(WINDOWS_EOL) 38..38, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap index 49b3db404d171..a01a3dd252957 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(UNIX_EOL) 12..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 15..16, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(UNIX_EOL) 21..27, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 28..30, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(UNIX_EOL) 32..32, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap index 2ebebf4483099..2f84b6b91a9d2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__double_dedent_with_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: double_dedent_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 13..15, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -64,9 +66,9 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 24..30, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 31..33, ), ( @@ -86,3 +88,4 @@ expression: double_dedent_with_eol(WINDOWS_EOL) 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap new file mode 100644 index 0000000000000..0a9bec6cf95b6 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__emoji_identifier.snap @@ -0,0 +1,24 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: "lex_invalid(source, Mode::Module)" +--- +## Tokens +``` +[ + ( + Unknown, + 0..4, + ), +] +``` +## Errors +``` +[ + LexicalError { + error: UnrecognizedToken { + tok: '🐦', + }, + location: 0..4, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap index 9733379a7b7af..2e6c623f951b1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap @@ -2,115 +2,97 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 2..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "", + ), 4..6, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 7..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 9..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 11..13, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 13..14, + TokenFlags( + F_STRING, + ), ), ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "", + ), 15..17, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 18..22, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 22..25, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 26..30, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 30..33, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap index 133690977b443..848e576a83c3c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_ipython_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -103,3 +105,4 @@ expression: lex_jupyter_source(source) 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap index 34fd624fa6a97..baa500ccb78b2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__escape_unicode_name.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - String { - value: "\\N{EN SPACE}", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "\\N{EN SPACE}", + ), 0..14, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap index cdc24e203e07d..cd6778a73adad 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "normal ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "normal ", + ), 2..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 10..13, ), ( @@ -43,26 +36,22 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " {another} ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {another} ", + ), 14..27, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 27..28, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 28..31, ), ( @@ -70,26 +59,22 @@ expression: lex_source(source) 31..32, ), ( - FStringMiddle { - value: " {", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {", + ), 32..35, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 35..36, ), ( - Name { - name: "three", - }, + Name( + "three", + ), 36..41, ), ( @@ -97,24 +82,24 @@ expression: lex_source(source) 41..42, ), ( - FStringMiddle { - value: "}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "}", + ), 42..44, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 44..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 45..45, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap index 115fc4991d915..8eb4842ebb8e9 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap @@ -2,40 +2,31 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 0..4, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "\n# not a comment ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "\n# not a comment ", + ), 4..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, 21..22, ), ( - Comment( - "# comment {", - ), + Comment, 23..34, ), ( @@ -43,9 +34,9 @@ expression: lex_source(source) 34..35, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 39..40, ), ( @@ -57,24 +48,24 @@ expression: lex_source(source) 41..42, ), ( - FStringMiddle { - value: " # not a comment\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + " # not a comment\n", + ), 42..59, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 59..62, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 62..62, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap index 9e237274e1287..bcda1c925b961 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 3..4, ), ( @@ -30,9 +27,9 @@ expression: lex_source(source) 4..5, ), ( - Name { - name: "s", - }, + Name( + "s", + ), 5..6, ), ( @@ -40,26 +37,22 @@ expression: lex_source(source) 6..7, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 7..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 8..9, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 9..10, ), ( @@ -71,9 +64,9 @@ expression: lex_source(source) 11..12, ), ( - Name { - name: "r", - }, + Name( + "r", + ), 12..13, ), ( @@ -81,26 +74,22 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 14..15, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 15..16, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 16..17, ), ( @@ -108,41 +97,37 @@ expression: lex_source(source) 17..18, ), ( - FStringMiddle { - value: ".3f!r", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".3f!r", + ), 18..23, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 23..24, ), ( - FStringMiddle { - value: " {x!r}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {x!r}", + ), 24..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 32..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 33..33, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap index d42ff61b5ed99..b581901ed9421 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\", + ), 2..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 3..4, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 4..5, ), ( @@ -43,26 +36,22 @@ expression: lex_source(source) 5..6, ), ( - FStringMiddle { - value: "\\\"\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\\"\\", + ), 6..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -74,24 +63,24 @@ expression: lex_source(source) 12..13, ), ( - FStringMiddle { - value: " \\\"\\\"\\\n end", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " \\\"\\\"\\\n end", + ), 13..24, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 24..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 25..25, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap index e4cc748fa5495..d8d007d560fb1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_braces.snap @@ -2,40 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\", + ), 2..3, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 3..4, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -45,40 +38,34 @@ expression: lex_source(source) ( FStringEnd, 8..9, + TokenFlags( + F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 10..12, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\\\", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\\\", + ), 12..14, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 14..15, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 15..18, ), ( @@ -88,67 +75,59 @@ expression: lex_source(source) ( FStringEnd, 19..20, + TokenFlags( + F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 21..23, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\{foo}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\{foo}", + ), 23..31, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 31..32, + TokenFlags( + F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 33..35, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\\\{foo}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\\\{foo}", + ), 35..44, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 44..45, + TokenFlags( + F_STRING, + ), ), ( Newline, 45..45, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap index af0f7391c5ada..e92513e5bb596 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap @@ -2,44 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringMiddle { - value: "\\", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\", + ), 3..4, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 4..5, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 5..6, ), ( @@ -47,28 +36,22 @@ expression: lex_source(source) 6..7, ), ( - FStringMiddle { - value: "\\\"\\", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\\"\\", + ), 7..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 10..11, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 11..12, ), ( @@ -80,26 +63,24 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: " \\\"\\\"\\\n end", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " \\\"\\\"\\\n end", + ), 14..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 25..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Newline, 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap index c02888312a41c..fef1db4f33e69 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "first ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "first ", + ), 2..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 14..15, ), ( @@ -55,9 +48,9 @@ expression: lex_source(source) 25..26, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 38..39, ), ( @@ -69,24 +62,24 @@ expression: lex_source(source) 40..41, ), ( - FStringMiddle { - value: " second", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " second", + ), 41..48, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 48..49, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 49..49, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap index d9a0765595b1f..0393d76865383 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap @@ -2,127 +2,99 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 0..4, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "\nhello\n world\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "\nhello\n world\n", + ), 4..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 21..24, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 25..29, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "\n world\nhello\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "\n world\nhello\n", + ), 29..46, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 46..49, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 50..52, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "some ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "some ", + ), 52..57, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 57..58, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - ), + FStringStart, 58..62, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "multiline\nallowed ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + FStringMiddle( + "multiline\nallowed ", + ), 62..80, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, 80..81, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 81..82, ), ( @@ -132,30 +104,33 @@ expression: lex_source(source) ( FStringEnd, 83..86, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 86..87, ), ( - FStringMiddle { - value: " string", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " string", + ), 87..94, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 94..95, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 95..95, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap index 2ee532ba5553e..0f729e45f8b93 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "\\N{BULLET} normal \\Nope \\N", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\N{BULLET} normal \\Nope \\N", + ), 2..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 28..29, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 29..29, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap index 6de98ec5266e1..760a7153741bb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap @@ -2,44 +2,33 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringMiddle { - value: "\\N", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "\\N", + ), 3..5, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Lbrace, 5..6, ), ( - Name { - name: "BULLET", - }, + Name( + "BULLET", + ), 6..12, ), ( @@ -47,26 +36,24 @@ expression: lex_source(source) 12..13, ), ( - FStringMiddle { - value: " normal", - flags: AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " normal", + ), 13..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 20..21, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap index 02ff537d09842..3e82eadf77de6 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap @@ -2,69 +2,53 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "foo ", + ), 2..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 6..7, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 7..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "bar ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "bar ", + ), 9..13, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 13..14, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 14..15, ), ( @@ -72,25 +56,20 @@ expression: lex_source(source) 16..17, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "wow", - }, + Name( + "wow", + ), 21..24, ), ( @@ -100,6 +79,9 @@ expression: lex_source(source) ( FStringEnd, 25..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, @@ -108,135 +90,112 @@ expression: lex_source(source) ( FStringEnd, 27..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 28..29, ), ( - FStringMiddle { - value: " baz", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " baz", + ), 29..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 33..34, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 35..37, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "foo ", + ), 37..41, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 41..42, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 42..44, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "bar", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "bar", + ), 44..47, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 47..48, + TokenFlags( + F_STRING, + ), ), ( Rbrace, 48..49, ), ( - FStringMiddle { - value: " some ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + " some ", + ), 49..55, + TokenFlags( + F_STRING, + ), ), ( Lbrace, 55..56, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 56..58, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "another", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "another", + ), 58..65, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 65..66, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, @@ -245,9 +204,13 @@ expression: lex_source(source) ( FStringEnd, 67..68, + TokenFlags( + F_STRING, + ), ), ( Newline, 68..68, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap index 8654030cc6a9e..1212187d91f06 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap @@ -2,18 +2,15 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -26,60 +23,48 @@ expression: lex_source(source) ( FStringEnd, 4..5, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 6..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "{}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "{}", + ), 8..12, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 12..13, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 14..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -92,31 +77,25 @@ expression: lex_source(source) ( FStringEnd, 19..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 21..23, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "{", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "{", + ), 23..25, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -127,75 +106,59 @@ expression: lex_source(source) 26..27, ), ( - FStringMiddle { - value: "}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "}", + ), 27..29, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 29..30, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 31..33, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "{{}}", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "{{}}", + ), 33..41, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 41..42, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 43..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 45..46, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -206,17 +169,13 @@ expression: lex_source(source) 47..48, ), ( - FStringMiddle { - value: " {} {", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " {} {", + ), 48..56, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -227,24 +186,24 @@ expression: lex_source(source) 57..58, ), ( - FStringMiddle { - value: "} {{}} ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "} {{}} ", + ), 58..71, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 71..72, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 72..72, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap index faf6cbe440ee3..f134fe8d99a10 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap @@ -2,185 +2,152 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 2..3, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 4..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 6..7, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 8..11, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 11..12, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 13..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..21, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 21..22, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 23..26, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 26..27, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 28..31, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 31..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: false, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 33..36, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( FStringEnd, 36..37, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_LOWERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 38..41, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 41..42, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Raw { - uppercase_r: true, - }, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 43..46, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( FStringEnd, 46..47, + TokenFlags( + DOUBLE_QUOTES | F_STRING | RAW_STRING_UPPERCASE, + ), ), ( Newline, 47..47, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap index c45fb72c945b1..bb5f4d7597dd5 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_mac_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(MAC_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\r more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\r more text", + ), 2..19, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 19..20, + TokenFlags( + F_STRING, + ), ), ( Newline, 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap index 1a27f26ecb0dd..ace6850825be5 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_unix_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(UNIX_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\n more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\n more text", + ), 2..19, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 19..20, + TokenFlags( + F_STRING, + ), ), ( Newline, 20..20, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap index 99edd6be19e25..a3b11f3d6b896 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_single_quote_escape_windows_eol.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: fstring_single_quote_escape_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "text \\\r\n more text", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "text \\\r\n more text", + ), 2..20, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 20..21, + TokenFlags( + F_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap index d9c595f8f08a5..8157de849c983 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 3..6, ), ( @@ -34,26 +31,22 @@ expression: lex_source(source) 7..8, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 8..9, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 9..10, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -65,9 +58,9 @@ expression: lex_source(source) 12..13, ), ( - Name { - name: "s", - }, + Name( + "s", + ), 13..14, ), ( @@ -75,43 +68,35 @@ expression: lex_source(source) 14..15, ), ( - FStringMiddle { - value: ".3f", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".3f", + ), 15..18, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 18..19, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 19..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 21..22, ), ( @@ -119,26 +104,22 @@ expression: lex_source(source) 22..23, ), ( - FStringMiddle { - value: ".", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + ".", + ), 23..24, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 24..25, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 25..26, ), ( @@ -146,50 +127,35 @@ expression: lex_source(source) 26..27, ), ( - FStringMiddle { - value: "f", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "f", + ), 27..28, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 28..29, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 29..30, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 30..31, ), ( - String { - value: "", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "", + ), 31..33, ), ( @@ -197,26 +163,22 @@ expression: lex_source(source) 33..34, ), ( - FStringMiddle { - value: "*^", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "*^", + ), 34..36, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 36..37, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 37..38, ), ( @@ -228,9 +190,9 @@ expression: lex_source(source) 39..40, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 40..41, ), ( @@ -246,26 +208,22 @@ expression: lex_source(source) 43..44, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 44..45, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 45..46, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 46..47, ), ( @@ -281,9 +239,9 @@ expression: lex_source(source) 49..50, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 50..51, ), ( @@ -295,9 +253,9 @@ expression: lex_source(source) 52..53, ), ( - Name { - name: "pop", - }, + Name( + "pop", + ), 53..56, ), ( @@ -319,9 +277,13 @@ expression: lex_source(source) ( FStringEnd, 60..61, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 61..61, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap index 3d89467bcdfc1..7c749c92e7782 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( - FStringMiddle { - value: "foo ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "foo ", + ), 2..6, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 7..8, ), ( - Name { - name: "pwd", - }, + Name( + "pwd", + ), 8..11, ), ( @@ -47,24 +40,24 @@ expression: lex_source(source) 11..12, ), ( - FStringMiddle { - value: " bar", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " bar", + ), 12..16, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 17..17, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap index 5e63f7f917832..5fde2adc2c307 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_lambda_expression.snap @@ -2,18 +2,15 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -24,9 +21,9 @@ expression: lex_source(source) 3..9, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 10..11, ), ( @@ -38,9 +35,9 @@ expression: lex_source(source) 12..13, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 13..14, ), ( @@ -54,22 +51,20 @@ expression: lex_source(source) ( FStringEnd, 16..17, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 17..18, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 18..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -84,9 +79,9 @@ expression: lex_source(source) 22..28, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 29..30, ), ( @@ -98,9 +93,9 @@ expression: lex_source(source) 31..32, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 32..33, ), ( @@ -118,9 +113,13 @@ expression: lex_source(source) ( FStringEnd, 36..37, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap index a4a46dfacf6a8..4e46987c1242e 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -2,31 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 0..4, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 4..6, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, @@ -37,9 +30,9 @@ expression: lex_source(source) 7..8, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 12..13, ), ( @@ -47,67 +40,53 @@ expression: lex_source(source) 13..14, ), ( - FStringMiddle { - value: "d\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "d\n", + ), 14..16, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 16..17, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 17..19, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 19..22, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 22..23, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - ), + FStringStart, 23..27, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 27..29, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Lbrace, @@ -118,9 +97,9 @@ expression: lex_source(source) 30..31, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 35..36, ), ( @@ -128,67 +107,53 @@ expression: lex_source(source) 36..37, ), ( - FStringMiddle { - value: "a\n b\n c\n", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "a\n b\n c\n", + ), 37..61, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Rbrace, 61..62, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: true, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 62..64, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( FStringEnd, 64..67, + TokenFlags( + TRIPLE_QUOTED_STRING | F_STRING, + ), ), ( Newline, 67..68, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 68..70, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 70..72, + TokenFlags( + F_STRING, + ), ), ( Lbrace, @@ -199,9 +164,9 @@ expression: lex_source(source) 73..74, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 78..79, ), ( @@ -209,17 +174,13 @@ expression: lex_source(source) 79..80, ), ( - FStringMiddle { - value: "d", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "d", + ), 80..81, + TokenFlags( + F_STRING, + ), ), ( NonLogicalNewline, @@ -230,50 +191,40 @@ expression: lex_source(source) 82..83, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 83..85, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 85..86, + TokenFlags( + F_STRING, + ), ), ( Newline, 86..87, ), ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 87..89, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 89..91, + TokenFlags( + F_STRING, + ), ), ( Lbrace, @@ -284,9 +235,9 @@ expression: lex_source(source) 92..93, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 97..98, ), ( @@ -294,26 +245,22 @@ expression: lex_source(source) 98..99, ), ( - FStringMiddle { - value: "a", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "a", + ), 99..100, + TokenFlags( + F_STRING, + ), ), ( NonLogicalNewline, 100..101, ), ( - Name { - name: "b", - }, + Name( + "b", + ), 109..110, ), ( @@ -325,24 +272,24 @@ expression: lex_source(source) 111..112, ), ( - FStringMiddle { - value: "__", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "__", + ), 112..114, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 114..115, + TokenFlags( + F_STRING, + ), ), ( Newline, 115..116, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap index c013731900a09..900373f25c231 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap @@ -2,27 +2,24 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - ), + FStringStart, 0..2, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 2..3, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 3..4, ), ( @@ -30,34 +27,26 @@ expression: lex_source(source) 4..5, ), ( - FStringMiddle { - value: "=10", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + "=10", + ), 5..8, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Rbrace, 8..9, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 9..10, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -68,9 +57,9 @@ expression: lex_source(source) 11..12, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 12..13, ), ( @@ -78,9 +67,9 @@ expression: lex_source(source) 13..15, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 15..17, ), ( @@ -92,26 +81,22 @@ expression: lex_source(source) 18..19, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 19..20, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, 20..21, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 21..22, ), ( @@ -123,9 +108,9 @@ expression: lex_source(source) 23..24, ), ( - Name { - name: "y", - }, + Name( + "y", + ), 24..25, ), ( @@ -133,9 +118,9 @@ expression: lex_source(source) 25..27, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 27..29, ), ( @@ -147,17 +132,13 @@ expression: lex_source(source) 30..31, ), ( - FStringMiddle { - value: " ", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + FStringMiddle( + " ", + ), 31..32, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Lbrace, @@ -168,9 +149,9 @@ expression: lex_source(source) 33..34, ), ( - Name { - name: "x", - }, + Name( + "x", + ), 34..35, ), ( @@ -178,9 +159,9 @@ expression: lex_source(source) 35..37, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 37..39, ), ( @@ -194,9 +175,13 @@ expression: lex_source(source) ( FStringEnd, 41..42, + TokenFlags( + DOUBLE_QUOTES | F_STRING, + ), ), ( Newline, 42..42, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap index d612885716593..2620cb6cc86dd 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_nul_char.snap @@ -2,38 +2,35 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - FStringStart( - AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - ), + FStringStart, 0..2, + TokenFlags( + F_STRING, + ), ), ( - FStringMiddle { - value: "\\0", - flags: AnyStringFlags { - prefix: Format( - Regular, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + FStringMiddle( + "\\0", + ), 2..4, + TokenFlags( + F_STRING, + ), ), ( FStringEnd, 4..5, + TokenFlags( + F_STRING, + ), ), ( Newline, 5..5, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap index 96de2fd3924ad..be043b9151f15 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_mac_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(MAC_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(MAC_EOL) 15..21, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 22..24, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(MAC_EOL) 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap index c680d32089fc0..7f92d8a8df78a 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_unix_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(UNIX_EOL) 15..21, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 22..24, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(UNIX_EOL) 26..26, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap index acd7bc7f68db0..e7c4cdb3f0519 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__indentation_with_windows_eol.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: indentation_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( Def, 0..3, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 4..7, ), ( @@ -38,9 +40,9 @@ expression: indentation_with_eol(WINDOWS_EOL) 16..22, ), ( - Int { - value: 99, - }, + Int( + 99, + ), 23..25, ), ( @@ -56,3 +58,4 @@ expression: indentation_with_eol(WINDOWS_EOL) 29..29, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap index a0eb10bff4219..189a89b5b8f05 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_big.snap @@ -1,12 +1,28 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- -Err( +## Tokens +``` +[ + ( + Unknown, + 0..85, + ), + ( + Newline, + 85..85, + ), +] +``` +## Errors +``` +[ LexicalError { error: OtherError( "Invalid decimal integer literal", ), location: 0..85, }, -) +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap index cf606bd31dab2..50a1a1564fd46 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__invalid_leading_zero_small.snap @@ -1,12 +1,28 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- -Err( +## Tokens +``` +[ + ( + Unknown, + 0..3, + ), + ( + Newline, + 3..3, + ), +] +``` +## Errors +``` +[ LexicalError { error: OtherError( "Invalid decimal integer literal", ), location: 0..3, }, -) +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap index dc3d3ec217d19..87c81113472b2 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -125,3 +127,4 @@ expression: lex_jupyter_source(source) 180..180, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap index 07b029d90dc22..32a7e56eea8ef 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_assignment.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( - Name { - name: "pwd", - }, + Name( + "pwd", + ), 0..3, ), ( @@ -25,9 +27,9 @@ expression: lex_jupyter_source(source) 10..11, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 11..14, ), ( @@ -46,9 +48,9 @@ expression: lex_jupyter_source(source) 30..31, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 31..34, ), ( @@ -67,9 +69,9 @@ expression: lex_jupyter_source(source) 50..51, ), ( - Name { - name: "baz", - }, + Name( + "baz", + ), 51..54, ), ( @@ -88,3 +90,4 @@ expression: lex_jupyter_source(source) 85..85, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap index 1a3d7e016cf33..add0a353641c7 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_indentation.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( If, @@ -39,3 +41,4 @@ expression: lex_jupyter_source(source) 43..43, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap index c10f2fb977ba2..913af5854f759 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_mac_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(MAC_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(MAC_EOL) 24..24, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap index 938d150f9eae3..4710ed4bc1bcb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_unix_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(UNIX_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(UNIX_EOL) 24..24, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap index c5f5d29dd0eb4..0e9e3bde728cb 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_windows_eol.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_eol(WINDOWS_EOL) 25..25, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap index ffee4a7eecada..1d842b60d1bc4 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_mac_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(MAC_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(MAC_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap index e5227d0a0615e..13d4cf600fa4c 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_unix_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(UNIX_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(UNIX_EOL) 14..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap index 7950d33905211..b70e615c07d5d 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_escape_command_line_continuation_with_windows_eol_and_eof.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: ipython_escape_command_line_continuation_with_eol_and_eof(WINDOWS_EOL) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -15,3 +17,4 @@ expression: ipython_escape_command_line_continuation_with_eol_and_eof(WINDOWS_EO 15..15, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap index b760410a5ee94..69e13c03bd415 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__ipython_help_end_escape_command.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( IpyEscapeCommand { @@ -180,3 +182,4 @@ expression: lex_jupyter_source(source) 132..132, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap index 34d9125a6312f..8aa9156f9f863 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_empty.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "#", - ), + Comment, 7..8, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 8..8, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap index 0731cf4711d68..b583477cdb01b 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_long.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# foo", - ), + Comment, 7..12, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 12..12, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap index f248b93ef19bf..0c4d6c8372ea1 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_single_whitespace.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# ", - ), + Comment, 7..9, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 9..9, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap index 45939100980f3..f2e37aa31d803 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__line_comment_whitespace.snap @@ -2,17 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(&source) --- +## Tokens +``` [ ( - Int { - value: 99232, - }, + Int( + 99232, + ), 0..5, ), ( - Comment( - "# ", - ), + Comment, 7..10, ), ( @@ -20,3 +20,4 @@ expression: lex_source(&source) 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap index 944ad882a0d68..151f5dedd49f7 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__logical_newline_line_comment.snap @@ -2,11 +2,11 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Comment( - "#Hello", - ), + Comment, 0..6, ), ( @@ -14,9 +14,7 @@ expression: lex_source(source) 6..7, ), ( - Comment( - "#World", - ), + Comment, 7..13, ), ( @@ -24,3 +22,4 @@ expression: lex_source(source) 13..14, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap index 0512714bd466a..d56f39910dcec 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__match_softkeyword_in_notebook.snap @@ -2,15 +2,17 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_jupyter_source(source) --- +## Tokens +``` [ ( Match, 0..5, ), ( - Name { - name: "foo", - }, + Name( + "foo", + ), 6..9, ), ( @@ -30,9 +32,9 @@ expression: lex_jupyter_source(source) 15..19, ), ( - Name { - name: "bar", - }, + Name( + "bar", + ), 20..23, ), ( @@ -64,3 +66,4 @@ expression: lex_jupyter_source(source) 37..37, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap index 0a0a9fb1da3c7..d167752f78baa 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_mac_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(MAC_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 6..7, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 11..12, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 12..13, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 13..14, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 16..17, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 17..18, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 19..20, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 20..21, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 27..28, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 28..29, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 30..31, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 31..32, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(MAC_EOL) 32..33, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 35..36, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(MAC_EOL) 38..39, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap index c3df5dbd24e4d..6355d419f262f 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_unix_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(UNIX_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 6..7, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 11..12, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 12..13, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 13..14, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 16..17, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 17..18, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 19..20, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 20..21, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 27..28, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 28..29, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 30..31, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 31..32, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(UNIX_EOL) 32..33, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 35..36, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(UNIX_EOL) 38..39, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap index 34184c68a9a08..cfcd1f7ea18f4 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__newline_in_brackets_windows_eol.snap @@ -2,11 +2,13 @@ source: crates/ruff_python_parser/src/lexer.rs expression: newline_in_brackets_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - Name { - name: "x", - }, + Name( + "x", + ), 0..1, ), ( @@ -26,9 +28,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 7..9, ), ( - Int { - value: 1, - }, + Int( + 1, + ), 13..14, ), ( @@ -36,9 +38,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 14..15, ), ( - Int { - value: 2, - }, + Int( + 2, + ), 15..16, ), ( @@ -54,9 +56,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 19..20, ), ( - Int { - value: 3, - }, + Int( + 3, + ), 20..21, ), ( @@ -68,9 +70,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 22..24, ), ( - Int { - value: 4, - }, + Int( + 4, + ), 24..25, ), ( @@ -98,9 +100,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 32..34, ), ( - Int { - value: 5, - }, + Int( + 5, + ), 34..35, ), ( @@ -112,9 +114,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 36..38, ), ( - Int { - value: 6, - }, + Int( + 6, + ), 38..39, ), ( @@ -122,9 +124,9 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 39..40, ), ( - Int { - value: 7, - }, + Int( + 7, + ), 43..44, ), ( @@ -140,3 +142,4 @@ expression: newline_in_brackets_eol(WINDOWS_EOL) 46..48, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap index 1096935e0a1a5..48356832bc59a 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__non_logical_newline_in_string_continuation.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( Lpar, @@ -12,16 +14,9 @@ expression: lex_source(source) 1..2, ), ( - String { - value: "a", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "a", + ), 6..9, ), ( @@ -29,16 +24,9 @@ expression: lex_source(source) 9..10, ), ( - String { - value: "b", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "b", + ), 14..17, ), ( @@ -50,29 +38,15 @@ expression: lex_source(source) 18..19, ), ( - String { - value: "c", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "c", + ), 23..26, ), ( - String { - value: "d", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "d", + ), 33..36, ), ( @@ -88,3 +62,4 @@ expression: lex_source(source) 38..38, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap index 92bc661965efe..ee49b4ab2745b 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__numbers.snap @@ -2,59 +2,61 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - Int { - value: 47, - }, + Int( + 47, + ), 0..4, ), ( - Int { - value: 10, - }, + Int( + 10, + ), 5..9, ), ( - Int { - value: 13, - }, + Int( + 13, + ), 10..16, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 17..18, ), ( - Int { - value: 123, - }, + Int( + 123, + ), 19..22, ), ( - Int { - value: 1234567890, - }, + Int( + 1234567890, + ), 23..36, ), ( - Float { - value: 0.2, - }, + Float( + 0.2, + ), 37..40, ), ( - Float { - value: 100.0, - }, + Float( + 100.0, + ), 41..45, ), ( - Float { - value: 2100.0, - }, + Float( + 2100.0, + ), 46..51, ), ( @@ -72,21 +74,21 @@ expression: lex_source(source) 55..59, ), ( - Int { - value: 0, - }, + Int( + 0, + ), 60..63, ), ( - Int { - value: 11051210869376104954, - }, + Int( + 11051210869376104954, + ), 64..82, ), ( - Int { - value: 0x995DC9BBDF1939FA995DC9BBDF1939FA, - }, + Int( + 0x995DC9BBDF1939FA995DC9BBDF1939FA, + ), 83..117, ), ( @@ -94,3 +96,4 @@ expression: lex_source(source) 117..117, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap index 9da473b1d5050..3a241f6c7c0a0 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__operators.snap @@ -2,6 +2,8 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( DoubleSlash, @@ -28,3 +30,4 @@ expression: lex_source(source) 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap index 7b947ef55ffa0..c045c602e2601 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string.snap @@ -2,124 +2,70 @@ source: crates/ruff_python_parser/src/lexer.rs expression: lex_source(source) --- +## Tokens +``` [ ( - String { - value: "double", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "double", + ), 0..8, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( - String { - value: "single", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "single", + ), 9..17, ), ( - String { - value: "can\\'t", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "can\\'t", + ), 18..26, ), ( - String { - value: "\\\\\\\"", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "\\\\\\\"", + ), 27..33, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( - String { - value: "\\t\\r\\n", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\t\\r\\n", + ), 34..42, ), ( - String { - value: "\\g", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\g", + ), 43..47, ), ( - String { - value: "raw\\'", - flags: AnyStringFlags { - prefix: Regular( - Raw { - uppercase: false, - }, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "raw\\'", + ), 48..56, + TokenFlags( + RAW_STRING_LOWERCASE, + ), ), ( - String { - value: "\\420", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\420", + ), 57..63, ), ( - String { - value: "\\200\\0a", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Single, - }, - }, + String( + "\\200\\0a", + ), 64..73, ), ( @@ -127,3 +73,4 @@ expression: lex_source(source) 73..73, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap index 062e7563c3011..3df752853be80 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_mac_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(MAC_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\rdef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\rdef", + ), 0..10, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap index 285b0f72e36ec..e7413d10023d6 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_unix_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(UNIX_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\ndef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\ndef", + ), 0..10, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 10..10, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap index d1cbaf6552fad..ac945e5acee61 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__string_continuation_with_windows_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: string_continuation_with_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - String { - value: "abc\\\r\ndef", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: false, - quote_style: Double, - }, - }, + String( + "abc\\\r\ndef", + ), 0..11, + TokenFlags( + DOUBLE_QUOTES, + ), ), ( Newline, 11..11, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap index 648ba0ccda2fb..166877fd0e188 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap @@ -1,66 +1,58 @@ --- source: crates/ruff_python_parser/src/lexer.rs -expression: tokens +expression: "lex_invalid(source, Mode::Module)" --- +## Tokens +``` [ - Ok( - ( - If, - 0..2, - ), + ( + If, + 0..2, ), - Ok( - ( - True, - 3..7, - ), + ( + True, + 3..7, ), - Ok( - ( - Colon, - 7..8, - ), + ( + Colon, + 7..8, ), - Ok( - ( - Newline, - 8..9, - ), + ( + Newline, + 8..9, ), - Ok( - ( - Indent, - 9..13, - ), + ( + Indent, + 9..13, ), - Ok( - ( - Pass, - 13..17, - ), + ( + Pass, + 13..17, ), - Ok( - ( - Newline, - 17..18, - ), + ( + Newline, + 17..18, ), - Err( - LexicalError { - error: IndentationError, - location: 18..20, - }, + ( + Unknown, + 18..20, ), - Ok( - ( - Pass, - 20..24, - ), + ( + Pass, + 20..24, ), - Ok( - ( - Newline, - 24..24, - ), + ( + Newline, + 24..24, ), ] +``` +## Errors +``` +[ + LexicalError { + error: IndentationError, + location: 18..20, + }, +] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap index 6ab09f4663ffa..6dcccf3fdbbc7 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_mac_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(MAC_EOL) --- +## Tokens +``` [ ( - String { - value: "\r test string\r ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\r test string\r ", + ), 0..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap index 1fd944b34fe5f..70f9c06af3412 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_unix_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(UNIX_EOL) --- +## Tokens +``` [ ( - String { - value: "\n test string\n ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\n test string\n ", + ), 0..21, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 21..21, ), ] +``` diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap index 6944efe4bea9a..8dcdd0461c476 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__triple_quoted_windows_eol.snap @@ -2,22 +2,21 @@ source: crates/ruff_python_parser/src/lexer.rs expression: triple_quoted_eol(WINDOWS_EOL) --- +## Tokens +``` [ ( - String { - value: "\r\n test string\r\n ", - flags: AnyStringFlags { - prefix: Regular( - Empty, - ), - triple_quoted: true, - quote_style: Double, - }, - }, + String( + "\r\n test string\r\n ", + ), 0..23, + TokenFlags( + DOUBLE_QUOTES | TRIPLE_QUOTED_STRING, + ), ), ( Newline, 23..23, ), ] +``` diff --git a/crates/ruff_python_parser/src/soft_keywords.rs b/crates/ruff_python_parser/src/soft_keywords.rs deleted file mode 100644 index e29781c749752..0000000000000 --- a/crates/ruff_python_parser/src/soft_keywords.rs +++ /dev/null @@ -1,224 +0,0 @@ -use itertools::{Itertools, MultiPeek}; - -use crate::{lexer::LexResult, token::Tok, Mode}; - -/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` -/// `case`, and `type`). -/// -/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords -/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain -/// contexts. -/// -/// Later, [PEP 695](https://peps.python.org/pep-0695/#generic-type-alias) introduced the `type` -/// soft keyword. -/// -/// This function modifies a token stream to accommodate this change. In particular, it replaces -/// soft keyword tokens with `identifier` tokens if they are used as identifiers. -/// -/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and -/// `ruff_python_parser`, as neither of them need to be aware of soft keywords. -pub struct SoftKeywordTransformer -where - I: Iterator, -{ - underlying: MultiPeek, - position: Position, -} - -impl SoftKeywordTransformer -where - I: Iterator, -{ - pub fn new(lexer: I, mode: Mode) -> Self { - Self { - underlying: lexer.multipeek(), // spell-checker:ignore multipeek - position: if mode == Mode::Expression { - Position::Other - } else { - Position::Statement - }, - } - } -} - -impl Iterator for SoftKeywordTransformer -where - I: Iterator, -{ - type Item = LexResult; - - #[inline] - fn next(&mut self) -> Option { - let mut next = self.underlying.next(); - if let Some(Ok((tok, range))) = next.as_ref() { - // If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's - // used as an identifier. We assume every soft keyword use is an identifier unless - // a heuristic is met. - match tok { - // For `match` and `case`, all of the following conditions must be met: - // 1. The token is at the start of a logical line. - // 2. The logical line contains a top-level colon (that is, a colon that is not nested - // inside a parenthesized expression, list, or dictionary). - // 3. The top-level colon is not the immediate sibling of a `match` or `case` token. - // (This is to avoid treating `match` or `case` as identifiers when annotated with - // type hints.) - Tok::Match | Tok::Case => { - if matches!(self.position, Position::Statement) { - let mut nesting = 0; - let mut first = true; - let mut seen_colon = false; - let mut seen_lambda = false; - while let Some(Ok((tok, _))) = self.underlying.peek() { - match tok { - Tok::Newline => break, - Tok::Lambda if nesting == 0 => seen_lambda = true, - Tok::Colon if nesting == 0 => { - if seen_lambda { - seen_lambda = false; - } else if !first { - seen_colon = true; - } - } - Tok::Lpar | Tok::Lsqb | Tok::Lbrace => nesting += 1, - Tok::Rpar | Tok::Rsqb | Tok::Rbrace => nesting -= 1, - _ => {} - } - first = false; - } - if !seen_colon { - next = Some(Ok((soft_to_name(tok), *range))); - } - } else { - next = Some(Ok((soft_to_name(tok), *range))); - } - } - // For `type` all of the following conditions must be met: - // 1. The token is at the start of a logical line. - // 2. The type token is immediately followed by a name token. - // 3. The name token is eventually followed by an equality token. - Tok::Type => { - if matches!( - self.position, - Position::Statement | Position::SimpleStatement - ) { - let mut is_type_alias = false; - if let Some(Ok((tok, _))) = self.underlying.peek() { - if matches!( - tok, - Tok::Name { .. } | - // We treat a soft keyword token following a type token as a - // name to support cases like `type type = int` or `type match = int` - Tok::Type | Tok::Match | Tok::Case - ) { - let mut nesting = 0; - while let Some(Ok((tok, _))) = self.underlying.peek() { - match tok { - Tok::Newline => break, - Tok::Equal if nesting == 0 => { - is_type_alias = true; - break; - } - Tok::Lsqb => nesting += 1, - Tok::Rsqb => nesting -= 1, - // Allow arbitrary content within brackets for now - _ if nesting > 0 => {} - // Exit if unexpected tokens are seen - _ => break, - } - } - } - } - if !is_type_alias { - next = Some(Ok((soft_to_name(tok), *range))); - } - } else { - next = Some(Ok((soft_to_name(tok), *range))); - } - } - _ => (), // Not a soft keyword token - } - } - - // Update the position, to track whether we're at the start of a logical line. - if let Some(lex_result) = next.as_ref() { - if let Ok((tok, _)) = lex_result.as_ref() { - match tok { - Tok::NonLogicalNewline | Tok::Comment { .. } => { - // Nothing to do. - } - Tok::Newline | Tok::Indent | Tok::Dedent => { - self.position = Position::Statement; - } - // If we see a semicolon, assume we're at the start of a simple statement, as in: - // ```python - // type X = int; type Y = float - // ``` - Tok::Semi => { - self.position = Position::SimpleStatement; - } - // If we see a colon, and we're not in a nested context, assume we're at the - // start of a simple statement, as in: - // ```python - // class Class: type X = int - // ``` - Tok::Colon if self.position == Position::Other => { - self.position = Position::SimpleStatement; - } - Tok::Lpar | Tok::Lsqb | Tok::Lbrace => { - self.position = if let Position::Nested(depth) = self.position { - Position::Nested(depth.saturating_add(1)) - } else { - Position::Nested(1) - }; - } - Tok::Rpar | Tok::Rsqb | Tok::Rbrace => { - self.position = if let Position::Nested(depth) = self.position { - let depth = depth.saturating_sub(1); - if depth > 0 { - Position::Nested(depth) - } else { - Position::Other - } - } else { - Position::Other - }; - } - _ => { - self.position = Position::Other; - } - } - } - } - - next - } -} - -#[inline] -fn soft_to_name(tok: &Tok) -> Tok { - let name = match tok { - Tok::Match => "match", - Tok::Case => "case", - Tok::Type => "type", - _ => unreachable!("other tokens never reach here"), - }; - Tok::Name { - name: name.to_string().into_boxed_str(), - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Position { - /// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement. - Statement, - /// The lexer is at the start of a simple statement, e.g., a statement following a semicolon - /// or colon, as in: - /// ```python - /// class Class: type X = int - /// ``` - SimpleStatement, - /// The lexer is within brackets, with the given bracket nesting depth. - Nested(u32), - /// The lexer is some other location. - Other, -} diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index bd206d5e46573..3976da33876ee 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -469,13 +469,19 @@ pub(crate) fn parse_fstring_literal_element( #[cfg(test)] mod tests { + use ruff_python_ast::Suite; + use crate::lexer::LexicalErrorType; - use crate::{parse_suite, FStringErrorType, ParseErrorType, Suite}; + use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Parsed}; const WINDOWS_EOL: &str = "\r\n"; const MAC_EOL: &str = "\r"; const UNIX_EOL: &str = "\n"; + fn parse_suite(source: &str) -> Result { + parse_module(source).map(Parsed::into_suite) + } + fn string_parser_escaped_eol(eol: &str) -> Suite { let source = format!(r"'text \{eol}more text'"); parse_suite(&source).unwrap() @@ -483,73 +489,69 @@ mod tests { #[test] fn test_string_parser_escaped_unix_eol() { - let parse_ast = string_parser_escaped_eol(UNIX_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(UNIX_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_string_parser_escaped_mac_eol() { - let parse_ast = string_parser_escaped_eol(MAC_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(MAC_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_string_parser_escaped_windows_eol() { - let parse_ast = string_parser_escaped_eol(WINDOWS_EOL); - insta::assert_debug_snapshot!(parse_ast); + let suite = string_parser_escaped_eol(WINDOWS_EOL); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring() { let source = r#"f"{a}{ b }{{foo}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_spec() { let source = r#"f"{foo:{spec}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_not_nested_spec() { let source = r#"f"{foo:spec}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_empty_fstring() { - insta::assert_debug_snapshot!(parse_suite(r#"f"""#,).unwrap()); + let source = r#"f"""#; + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_base() { let source = r#"f"{user=}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_base_more() { let source = r#"f"mix {user=} with text and {second=}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_parse_self_documenting_format() { let source = r#"f"{user=:>10}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } fn parse_fstring_error(source: &str) -> FStringErrorType { @@ -577,240 +579,236 @@ mod tests { // error appears after the unexpected `FStringMiddle` token, which is between the // `:` and the `{`. // assert_eq!(parse_fstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses); - assert!(parse_suite(r#"f"{class}""#,).is_err()); + assert!(parse_suite(r#"f"{class}""#).is_err()); } #[test] fn test_parse_fstring_not_equals() { let source = r#"f"{1 != 2}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_equals() { let source = r#"f"{42 == 42}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_self_doc_prec_space() { let source = r#"f"{x =}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_self_doc_trailing_space() { let source = r#"f"{x= }""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_yield_expr() { let source = r#"f"{yield}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_string_concat() { let source = "'Hello ' 'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_string_concat_1() { let source = "'Hello ' u'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_string_concat_2() { let source = "u'Hello ' 'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_1() { let source = "'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_2() { let source = "'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_3() { let source = "'Hello ' f'world{\"!\"}'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_f_string_concat_4() { let source = "'Hello ' f'world{\"!\"}' 'again!'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_f_string_concat_1() { let source = "u'Hello ' f'world'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_u_f_string_concat_2() { let source = "u'Hello ' f'world' '!'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_string_triple_quotes_with_kind() { let source = "u'''Hello, world!'''"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_single_quoted_byte() { // single quote let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_double_quoted_byte() { // double quote let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_escape_char_in_byte_literal() { // backslash does not escape let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_byte_literal_1() { let source = r"rb'\x1z'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_byte_literal_2() { let source = r"rb'\\'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_escape_octet() { let source = r"b'\43a\4\1234'"; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_escaped_newline() { let source = r#"f"\n{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_constant_range() { let source = r#"f"aaa{bbb}ccc{ddd}eee""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_unescaped_newline() { let source = r#"f""" {x}""""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_escaped_character() { let source = r#"f"\\{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_raw_fstring() { let source = r#"rf"{x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_triple_quoted_raw_fstring() { let source = r#"rf"""{x}""""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_fstring_line_continuation() { let source = r#"rf"\ {x}""#; - let parse_ast = parse_suite(source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_string_spec() { let source = r#"f"{foo:{''}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_parse_fstring_nested_concatenation_string_spec() { let source = r#"f"{foo:{'' ''}}""#; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } /// #[test] fn test_dont_panic_on_8_in_octal_escape() { let source = r"bold = '\038[1m'"; - let parse_ast = parse_suite(source).unwrap(); - - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(source).unwrap(); + insta::assert_debug_snapshot!(suite); } #[test] fn test_invalid_unicode_literal() { let source = r"'\x1ó34'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -818,7 +816,6 @@ mod tests { fn test_missing_unicode_lbrace_error() { let source = r"'\N '"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -826,7 +823,6 @@ mod tests { fn test_missing_unicode_rbrace_error() { let source = r"'\N{SPACE'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -834,7 +830,6 @@ mod tests { fn test_invalid_unicode_name_error() { let source = r"'\N{INVALID}'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -842,7 +837,6 @@ mod tests { fn test_invalid_byte_literal_error() { let source = r"b'123a𝐁c'"; let error = parse_suite(source).unwrap_err(); - insta::assert_debug_snapshot!(error); } @@ -852,8 +846,8 @@ mod tests { #[test] fn $name() { let source = format!(r#""\N{{{0}}}""#, $alias); - let parse_ast = parse_suite(&source).unwrap(); - insta::assert_debug_snapshot!(parse_ast); + let suite = parse_suite(&source).unwrap(); + insta::assert_debug_snapshot!(suite); } )* } diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index 16ae72b313019..f9f3fe8bb2fd6 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -1,4 +1,4 @@ -//! Token type for Python source code created by the lexer and consumed by the `ruff_python_parser`. +//! Token kinds for Python source code created by the lexer and consumed by the `ruff_python_parser`. //! //! This module defines the tokens that the lexer recognizes. The tokens are //! loosely based on the token definitions found in the [CPython source]. @@ -7,482 +7,140 @@ use std::fmt; -use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp}; +use ruff_python_ast::{BoolOp, Operator, UnaryOp}; -/// The set of tokens the Python source code can be tokenized in. -#[derive(Clone, Debug, PartialEq, is_macro::Is)] -pub enum Tok { - /// Token value for a name, commonly known as an identifier. - Name { - /// The name value. - /// - /// Unicode names are NFKC-normalized by the lexer, - /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers) - name: Box, - }, - /// Token value for an integer. - Int { - /// The integer value. - value: Int, - }, - /// Token value for a floating point number. - Float { - /// The float value. - value: f64, - }, - /// Token value for a complex number. - Complex { - /// The real part of the complex number. - real: f64, - /// The imaginary part of the complex number. - imag: f64, - }, - /// Token value for a string. - String { - /// The string value. - value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, - }, - /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix - /// and the opening quote(s). - FStringStart(AnyStringFlags), - /// Token value that includes the portion of text inside the f-string that's not - /// part of the expression part and isn't an opening or closing brace. - FStringMiddle { - /// The string value. - value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, - }, - /// Token value for the end of an f-string. This includes the closing quote. - FStringEnd, - /// Token value for IPython escape commands. These are recognized by the lexer - /// only when the mode is [`Ipython`]. - /// - /// [`Ipython`]: crate::Mode::Ipython - IpyEscapeCommand { - /// The magic command value. - value: Box, - /// The kind of magic command. - kind: IpyEscapeKind, - }, - /// Token value for a comment. These are filtered out of the token stream prior to parsing. - Comment(Box), - /// Token value for a newline. - Newline, - /// Token value for a newline that is not a logical line break. These are filtered out of - /// the token stream prior to parsing. - NonLogicalNewline, - /// Token value for an indent. - Indent, - /// Token value for a dedent. - Dedent, - EndOfFile, - /// Token value for a question mark `?`. This is only used in [`Ipython`]. - /// - /// [`Ipython`]: crate::Mode::Ipython - Question, - /// Token value for a exclamation mark `!`. - Exclamation, - /// Token value for a left parenthesis `(`. - Lpar, - /// Token value for a right parenthesis `)`. - Rpar, - /// Token value for a left square bracket `[`. - Lsqb, - /// Token value for a right square bracket `]`. - Rsqb, - /// Token value for a colon `:`. - Colon, - /// Token value for a comma `,`. - Comma, - /// Token value for a semicolon `;`. - Semi, - /// Token value for plus `+`. - Plus, - /// Token value for minus `-`. - Minus, - /// Token value for star `*`. - Star, - /// Token value for slash `/`. - Slash, - /// Token value for vertical bar `|`. - Vbar, - /// Token value for ampersand `&`. - Amper, - /// Token value for less than `<`. - Less, - /// Token value for greater than `>`. - Greater, - /// Token value for equal `=`. - Equal, - /// Token value for dot `.`. - Dot, - /// Token value for percent `%`. - Percent, - /// Token value for left bracket `{`. - Lbrace, - /// Token value for right bracket `}`. - Rbrace, - /// Token value for double equal `==`. - EqEqual, - /// Token value for not equal `!=`. - NotEqual, - /// Token value for less than or equal `<=`. - LessEqual, - /// Token value for greater than or equal `>=`. - GreaterEqual, - /// Token value for tilde `~`. - Tilde, - /// Token value for caret `^`. - CircumFlex, - /// Token value for left shift `<<`. - LeftShift, - /// Token value for right shift `>>`. - RightShift, - /// Token value for double star `**`. - DoubleStar, - /// Token value for double star equal `**=`. - DoubleStarEqual, - /// Token value for plus equal `+=`. - PlusEqual, - /// Token value for minus equal `-=`. - MinusEqual, - /// Token value for star equal `*=`. - StarEqual, - /// Token value for slash equal `/=`. - SlashEqual, - /// Token value for percent equal `%=`. - PercentEqual, - /// Token value for ampersand equal `&=`. - AmperEqual, - /// Token value for vertical bar equal `|=`. - VbarEqual, - /// Token value for caret equal `^=`. - CircumflexEqual, - /// Token value for left shift equal `<<=`. - LeftShiftEqual, - /// Token value for right shift equal `>>=`. - RightShiftEqual, - /// Token value for double slash `//`. - DoubleSlash, - /// Token value for double slash equal `//=`. - DoubleSlashEqual, - /// Token value for colon equal `:=`. - ColonEqual, - /// Token value for at `@`. - At, - /// Token value for at equal `@=`. - AtEqual, - /// Token value for arrow `->`. - Rarrow, - /// Token value for ellipsis `...`. - Ellipsis, - - // Self documenting. - // Keywords (alphabetically): - False, - None, - True, - - And, - As, - Assert, - Async, - Await, - Break, - Class, - Continue, - Def, - Del, - Elif, - Else, - Except, - Finally, - For, - From, - Global, - If, - Import, - In, - Is, - Lambda, - Nonlocal, - Not, - Or, - Pass, - Raise, - Return, - Try, - While, - Match, - Type, - Case, - With, - Yield, - - Unknown, -} - -impl Tok { - #[inline] - pub fn kind(&self) -> TokenKind { - TokenKind::from_token(self) - } -} - -impl fmt::Display for Tok { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[allow(clippy::enum_glob_use)] - use Tok::*; - match self { - Name { name } => write!(f, "{name}"), - Int { value } => write!(f, "{value}"), - Float { value } => write!(f, "{value}"), - Complex { real, imag } => write!(f, "{real}j{imag}"), - String { value, flags } => { - write!(f, "{}", flags.format_string_contents(value)) - } - FStringStart(_) => f.write_str("FStringStart"), - FStringMiddle { value, .. } => f.write_str(value), - FStringEnd => f.write_str("FStringEnd"), - IpyEscapeCommand { kind, value } => write!(f, "{kind}{value}"), - Newline => f.write_str("Newline"), - NonLogicalNewline => f.write_str("NonLogicalNewline"), - Indent => f.write_str("Indent"), - Dedent => f.write_str("Dedent"), - EndOfFile => f.write_str("EOF"), - Question => f.write_str("?"), - Exclamation => f.write_str("!"), - Lpar => f.write_str("("), - Rpar => f.write_str(")"), - Lsqb => f.write_str("["), - Rsqb => f.write_str("]"), - Colon => f.write_str(":"), - Comma => f.write_str(","), - Comment(value) => f.write_str(value), - Semi => f.write_str(";"), - Plus => f.write_str("+"), - Minus => f.write_str("-"), - Star => f.write_str("*"), - Slash => f.write_str("/"), - Vbar => f.write_str("|"), - Amper => f.write_str("&"), - Less => f.write_str("<"), - Greater => f.write_str(">"), - Equal => f.write_str("="), - Dot => f.write_str("."), - Percent => f.write_str("%"), - Lbrace => f.write_str("{"), - Rbrace => f.write_str("}"), - EqEqual => f.write_str("=="), - NotEqual => f.write_str("!="), - LessEqual => f.write_str("<="), - GreaterEqual => f.write_str(">="), - Tilde => f.write_str("~"), - CircumFlex => f.write_str("^"), - LeftShift => f.write_str("<<"), - RightShift => f.write_str(">>"), - DoubleStar => f.write_str("**"), - DoubleStarEqual => f.write_str("**="), - PlusEqual => f.write_str("+="), - MinusEqual => f.write_str("-="), - StarEqual => f.write_str("*="), - SlashEqual => f.write_str("/="), - PercentEqual => f.write_str("%="), - AmperEqual => f.write_str("&="), - VbarEqual => f.write_str("|="), - CircumflexEqual => f.write_str("^="), - LeftShiftEqual => f.write_str("<<="), - RightShiftEqual => f.write_str(">>="), - DoubleSlash => f.write_str("//"), - DoubleSlashEqual => f.write_str("//="), - At => f.write_str("@"), - AtEqual => f.write_str("@="), - Rarrow => f.write_str("->"), - Ellipsis => f.write_str("..."), - False => f.write_str("False"), - None => f.write_str("None"), - True => f.write_str("True"), - And => f.write_str("and"), - As => f.write_str("as"), - Assert => f.write_str("assert"), - Async => f.write_str("async"), - Await => f.write_str("await"), - Break => f.write_str("break"), - Class => f.write_str("class"), - Continue => f.write_str("continue"), - Def => f.write_str("def"), - Del => f.write_str("del"), - Elif => f.write_str("elif"), - Else => f.write_str("else"), - Except => f.write_str("except"), - Finally => f.write_str("finally"), - For => f.write_str("for"), - From => f.write_str("from"), - Global => f.write_str("global"), - If => f.write_str("if"), - Import => f.write_str("import"), - In => f.write_str("in"), - Is => f.write_str("is"), - Lambda => f.write_str("lambda"), - Nonlocal => f.write_str("nonlocal"), - Not => f.write_str("not"), - Or => f.write_str("or"), - Pass => f.write_str("pass"), - Raise => f.write_str("raise"), - Return => f.write_str("return"), - Try => f.write_str("try"), - While => f.write_str("while"), - Match => f.write_str("match"), - Type => f.write_str("type"), - Case => f.write_str("case"), - With => f.write_str("with"), - Yield => f.write_str("yield"), - ColonEqual => f.write_str(":="), - Unknown => f.write_str(">"), - } - } -} - -/// A kind of token. -/// -/// This is a lightweight representation of [`Tok`] which doesn't contain any information -/// about the token itself. +/// A kind of a token. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub enum TokenKind { - /// Token value for a name, commonly known as an identifier. + /// Token kind for a name, commonly known as an identifier. Name, - /// Token value for an integer. + /// Token kind for an integer. Int, - /// Token value for a floating point number. + /// Token kind for a floating point number. Float, - /// Token value for a complex number. + /// Token kind for a complex number. Complex, - /// Token value for a string. + /// Token kind for a string. String, - /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix + /// Token kind for the start of an f-string. This includes the `f`/`F`/`fr` prefix /// and the opening quote(s). FStringStart, - /// Token value that includes the portion of text inside the f-string that's not + /// Token kind that includes the portion of text inside the f-string that's not /// part of the expression part and isn't an opening or closing brace. FStringMiddle, - /// Token value for the end of an f-string. This includes the closing quote. + /// Token kind for the end of an f-string. This includes the closing quote. FStringEnd, - /// Token value for a IPython escape command. + /// Token kind for a IPython escape command. IpyEscapeCommand, - /// Token value for a comment. These are filtered out of the token stream prior to parsing. + /// Token kind for a comment. These are filtered out of the token stream prior to parsing. Comment, - /// Token value for a newline. + /// Token kind for a newline. Newline, - /// Token value for a newline that is not a logical line break. These are filtered out of + /// Token kind for a newline that is not a logical line break. These are filtered out of /// the token stream prior to parsing. NonLogicalNewline, - /// Token value for an indent. + /// Token kind for an indent. Indent, - /// Token value for a dedent. + /// Token kind for a dedent. Dedent, EndOfFile, - /// Token value for a question mark `?`. + /// Token kind for a question mark `?`. Question, - /// Token value for an exclamation mark `!`. + /// Token kind for an exclamation mark `!`. Exclamation, - /// Token value for a left parenthesis `(`. + /// Token kind for a left parenthesis `(`. Lpar, - /// Token value for a right parenthesis `)`. + /// Token kind for a right parenthesis `)`. Rpar, - /// Token value for a left square bracket `[`. + /// Token kind for a left square bracket `[`. Lsqb, - /// Token value for a right square bracket `]`. + /// Token kind for a right square bracket `]`. Rsqb, - /// Token value for a colon `:`. + /// Token kind for a colon `:`. Colon, - /// Token value for a comma `,`. + /// Token kind for a comma `,`. Comma, - /// Token value for a semicolon `;`. + /// Token kind for a semicolon `;`. Semi, - /// Token value for plus `+`. + /// Token kind for plus `+`. Plus, - /// Token value for minus `-`. + /// Token kind for minus `-`. Minus, - /// Token value for star `*`. + /// Token kind for star `*`. Star, - /// Token value for slash `/`. + /// Token kind for slash `/`. Slash, - /// Token value for vertical bar `|`. + /// Token kind for vertical bar `|`. Vbar, - /// Token value for ampersand `&`. + /// Token kind for ampersand `&`. Amper, - /// Token value for less than `<`. + /// Token kind for less than `<`. Less, - /// Token value for greater than `>`. + /// Token kind for greater than `>`. Greater, - /// Token value for equal `=`. + /// Token kind for equal `=`. Equal, - /// Token value for dot `.`. + /// Token kind for dot `.`. Dot, - /// Token value for percent `%`. + /// Token kind for percent `%`. Percent, - /// Token value for left bracket `{`. + /// Token kind for left bracket `{`. Lbrace, - /// Token value for right bracket `}`. + /// Token kind for right bracket `}`. Rbrace, - /// Token value for double equal `==`. + /// Token kind for double equal `==`. EqEqual, - /// Token value for not equal `!=`. + /// Token kind for not equal `!=`. NotEqual, - /// Token value for less than or equal `<=`. + /// Token kind for less than or equal `<=`. LessEqual, - /// Token value for greater than or equal `>=`. + /// Token kind for greater than or equal `>=`. GreaterEqual, - /// Token value for tilde `~`. + /// Token kind for tilde `~`. Tilde, - /// Token value for caret `^`. + /// Token kind for caret `^`. CircumFlex, - /// Token value for left shift `<<`. + /// Token kind for left shift `<<`. LeftShift, - /// Token value for right shift `>>`. + /// Token kind for right shift `>>`. RightShift, - /// Token value for double star `**`. + /// Token kind for double star `**`. DoubleStar, - /// Token value for double star equal `**=`. + /// Token kind for double star equal `**=`. DoubleStarEqual, - /// Token value for plus equal `+=`. + /// Token kind for plus equal `+=`. PlusEqual, - /// Token value for minus equal `-=`. + /// Token kind for minus equal `-=`. MinusEqual, - /// Token value for star equal `*=`. + /// Token kind for star equal `*=`. StarEqual, - /// Token value for slash equal `/=`. + /// Token kind for slash equal `/=`. SlashEqual, - /// Token value for percent equal `%=`. + /// Token kind for percent equal `%=`. PercentEqual, - /// Token value for ampersand equal `&=`. + /// Token kind for ampersand equal `&=`. AmperEqual, - /// Token value for vertical bar equal `|=`. + /// Token kind for vertical bar equal `|=`. VbarEqual, - /// Token value for caret equal `^=`. + /// Token kind for caret equal `^=`. CircumflexEqual, - /// Token value for left shift equal `<<=`. + /// Token kind for left shift equal `<<=`. LeftShiftEqual, - /// Token value for right shift equal `>>=`. + /// Token kind for right shift equal `>>=`. RightShiftEqual, - /// Token value for double slash `//`. + /// Token kind for double slash `//`. DoubleSlash, - /// Token value for double slash equal `//=`. + /// Token kind for double slash equal `//=`. DoubleSlashEqual, - /// Token value for colon equal `:=`. + /// Token kind for colon equal `:=`. ColonEqual, - /// Token value for at `@`. + /// Token kind for at `@`. At, - /// Token value for at equal `@=`. + /// Token kind for at equal `@=`. AtEqual, - /// Token value for arrow `->`. + /// Token kind for arrow `->`. Rarrow, - /// Token value for ellipsis `...`. + /// Token kind for ellipsis `...`. Ellipsis, // The keywords should be sorted in alphabetical order. If the boundary tokens for the @@ -534,6 +192,11 @@ pub enum TokenKind { } impl TokenKind { + #[inline] + pub const fn is_eof(self) -> bool { + matches!(self, TokenKind::EndOfFile) + } + #[inline] pub const fn is_newline(self) -> bool { matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) @@ -541,7 +204,10 @@ impl TokenKind { /// Returns `true` if the token is a keyword (including soft keywords). /// - /// See also [`TokenKind::is_soft_keyword`], [`TokenKind::is_non_soft_keyword`]. + /// See also [`is_soft_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword #[inline] pub fn is_keyword(self) -> bool { TokenKind::And <= self && self <= TokenKind::Type @@ -549,7 +215,10 @@ impl TokenKind { /// Returns `true` if the token is strictly a soft keyword. /// - /// See also [`TokenKind::is_keyword`], [`TokenKind::is_non_soft_keyword`]. + /// See also [`is_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword #[inline] pub fn is_soft_keyword(self) -> bool { TokenKind::Case <= self && self <= TokenKind::Type @@ -557,7 +226,10 @@ impl TokenKind { /// Returns `true` if the token is strictly a non-soft keyword. /// - /// See also [`TokenKind::is_keyword`], [`TokenKind::is_soft_keyword`]. + /// See also [`is_keyword`], [`is_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword #[inline] pub fn is_non_soft_keyword(self) -> bool { TokenKind::And <= self && self <= TokenKind::Yield @@ -677,10 +349,12 @@ impl TokenKind { matches!(self, TokenKind::Plus | TokenKind::Minus) } - /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is an arithmetic unary + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary arithmetic /// operator, otherwise return [None]. /// - /// Use [`TokenKind::as_unary_operator`] to match against any unary operator. + /// Use [`as_unary_operator`] to match against any unary operator. + /// + /// [`as_unary_operator`]: TokenKind::as_unary_operator #[inline] pub(crate) const fn as_unary_arithmetic_operator(self) -> Option { Some(match self { @@ -693,8 +367,9 @@ impl TokenKind { /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator, /// otherwise return [None]. /// - /// Use [`TokenKind::as_unary_arithmetic_operator`] to match against only an arithmetic unary - /// operator. + /// Use [`as_unary_arithmetic_operator`] to match against only an arithmetic unary operator. + /// + /// [`as_unary_arithmetic_operator`]: TokenKind::as_unary_arithmetic_operator #[inline] pub(crate) const fn as_unary_operator(self) -> Option { Some(match self { @@ -720,8 +395,9 @@ impl TokenKind { /// Returns the binary [`Operator`] that corresponds to the current token, if it's a binary /// operator, otherwise return [None]. /// - /// Use [`TokenKind::as_augmented_assign_operator`] to match against an augmented assignment - /// token. + /// Use [`as_augmented_assign_operator`] to match against an augmented assignment token. + /// + /// [`as_augmented_assign_operator`]: TokenKind::as_augmented_assign_operator pub(crate) const fn as_binary_operator(self) -> Option { Some(match self { TokenKind::Plus => Operator::Add, @@ -762,126 +438,6 @@ impl TokenKind { _ => return None, }) } - - pub const fn from_token(token: &Tok) -> Self { - match token { - Tok::Name { .. } => TokenKind::Name, - Tok::Int { .. } => TokenKind::Int, - Tok::Float { .. } => TokenKind::Float, - Tok::Complex { .. } => TokenKind::Complex, - Tok::String { .. } => TokenKind::String, - Tok::FStringStart(_) => TokenKind::FStringStart, - Tok::FStringMiddle { .. } => TokenKind::FStringMiddle, - Tok::FStringEnd => TokenKind::FStringEnd, - Tok::IpyEscapeCommand { .. } => TokenKind::IpyEscapeCommand, - Tok::Comment(_) => TokenKind::Comment, - Tok::Newline => TokenKind::Newline, - Tok::NonLogicalNewline => TokenKind::NonLogicalNewline, - Tok::Indent => TokenKind::Indent, - Tok::Dedent => TokenKind::Dedent, - Tok::EndOfFile => TokenKind::EndOfFile, - Tok::Question => TokenKind::Question, - Tok::Exclamation => TokenKind::Exclamation, - Tok::Lpar => TokenKind::Lpar, - Tok::Rpar => TokenKind::Rpar, - Tok::Lsqb => TokenKind::Lsqb, - Tok::Rsqb => TokenKind::Rsqb, - Tok::Colon => TokenKind::Colon, - Tok::Comma => TokenKind::Comma, - Tok::Semi => TokenKind::Semi, - Tok::Plus => TokenKind::Plus, - Tok::Minus => TokenKind::Minus, - Tok::Star => TokenKind::Star, - Tok::Slash => TokenKind::Slash, - Tok::Vbar => TokenKind::Vbar, - Tok::Amper => TokenKind::Amper, - Tok::Less => TokenKind::Less, - Tok::Greater => TokenKind::Greater, - Tok::Equal => TokenKind::Equal, - Tok::Dot => TokenKind::Dot, - Tok::Percent => TokenKind::Percent, - Tok::Lbrace => TokenKind::Lbrace, - Tok::Rbrace => TokenKind::Rbrace, - Tok::EqEqual => TokenKind::EqEqual, - Tok::NotEqual => TokenKind::NotEqual, - Tok::LessEqual => TokenKind::LessEqual, - Tok::GreaterEqual => TokenKind::GreaterEqual, - Tok::Tilde => TokenKind::Tilde, - Tok::CircumFlex => TokenKind::CircumFlex, - Tok::LeftShift => TokenKind::LeftShift, - Tok::RightShift => TokenKind::RightShift, - Tok::DoubleStar => TokenKind::DoubleStar, - Tok::DoubleStarEqual => TokenKind::DoubleStarEqual, - Tok::PlusEqual => TokenKind::PlusEqual, - Tok::MinusEqual => TokenKind::MinusEqual, - Tok::StarEqual => TokenKind::StarEqual, - Tok::SlashEqual => TokenKind::SlashEqual, - Tok::PercentEqual => TokenKind::PercentEqual, - Tok::AmperEqual => TokenKind::AmperEqual, - Tok::VbarEqual => TokenKind::VbarEqual, - Tok::CircumflexEqual => TokenKind::CircumflexEqual, - Tok::LeftShiftEqual => TokenKind::LeftShiftEqual, - Tok::RightShiftEqual => TokenKind::RightShiftEqual, - Tok::DoubleSlash => TokenKind::DoubleSlash, - Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual, - Tok::ColonEqual => TokenKind::ColonEqual, - Tok::At => TokenKind::At, - Tok::AtEqual => TokenKind::AtEqual, - Tok::Rarrow => TokenKind::Rarrow, - Tok::Ellipsis => TokenKind::Ellipsis, - Tok::False => TokenKind::False, - Tok::None => TokenKind::None, - Tok::True => TokenKind::True, - Tok::And => TokenKind::And, - Tok::As => TokenKind::As, - Tok::Assert => TokenKind::Assert, - Tok::Async => TokenKind::Async, - Tok::Await => TokenKind::Await, - Tok::Break => TokenKind::Break, - Tok::Class => TokenKind::Class, - Tok::Continue => TokenKind::Continue, - Tok::Def => TokenKind::Def, - Tok::Del => TokenKind::Del, - Tok::Elif => TokenKind::Elif, - Tok::Else => TokenKind::Else, - Tok::Except => TokenKind::Except, - Tok::Finally => TokenKind::Finally, - Tok::For => TokenKind::For, - Tok::From => TokenKind::From, - Tok::Global => TokenKind::Global, - Tok::If => TokenKind::If, - Tok::Import => TokenKind::Import, - Tok::In => TokenKind::In, - Tok::Is => TokenKind::Is, - Tok::Lambda => TokenKind::Lambda, - Tok::Nonlocal => TokenKind::Nonlocal, - Tok::Not => TokenKind::Not, - Tok::Or => TokenKind::Or, - Tok::Pass => TokenKind::Pass, - Tok::Raise => TokenKind::Raise, - Tok::Return => TokenKind::Return, - Tok::Try => TokenKind::Try, - Tok::While => TokenKind::While, - Tok::Match => TokenKind::Match, - Tok::Case => TokenKind::Case, - Tok::Type => TokenKind::Type, - Tok::With => TokenKind::With, - Tok::Yield => TokenKind::Yield, - Tok::Unknown => TokenKind::Unknown, - } - } -} - -impl From<&Tok> for TokenKind { - fn from(value: &Tok) -> Self { - Self::from_token(value) - } -} - -impl From for TokenKind { - fn from(value: Tok) -> Self { - Self::from_token(&value) - } } impl From for TokenKind { @@ -1041,10 +597,8 @@ impl fmt::Display for TokenKind { #[cfg(target_pointer_width = "64")] mod sizes { use crate::lexer::{LexicalError, LexicalErrorType}; - use crate::Tok; use static_assertions::assert_eq_size; - assert_eq_size!(Tok, [u8; 24]); assert_eq_size!(LexicalErrorType, [u8; 24]); - assert_eq_size!(Result, [u8; 32]); + assert_eq_size!(LexicalError, [u8; 32]); } diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 1b48b143cfd56..005c5ff38d6b4 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,115 +1,189 @@ -use std::iter::FusedIterator; - +use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; -use crate::lexer::{LexResult, LexicalError, Spanned}; -use crate::{Tok, TokenKind}; +use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue}; +use crate::{Mode, TokenKind}; + +/// Token source for the parser that skips over any trivia tokens. +#[derive(Debug)] +pub(crate) struct TokenSource<'src> { + /// The underlying source for the tokens. + lexer: Lexer<'src>, + + /// A vector containing all the tokens emitted by the lexer. This is returned when the parser + /// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector + /// holds both the trivia and non-trivia tokens. + tokens: Vec, -#[derive(Clone, Debug)] -pub(crate) struct TokenSource { - tokens: std::vec::IntoIter, - errors: Vec, + /// A vector containing the range of all the comment tokens emitted by the lexer. + comments: Vec, } -impl TokenSource { - pub(crate) fn new(tokens: Vec) -> Self { - Self { - tokens: tokens.into_iter(), - errors: Vec::new(), +impl<'src> TokenSource<'src> { + /// Create a new token source for the given lexer. + pub(crate) fn new(lexer: Lexer<'src>) -> Self { + // TODO(dhruvmanila): Use `allocate_tokens_vec` + TokenSource { + lexer, + tokens: vec![], + comments: vec![], } } - /// Returns the position of the current token. - /// - /// This is the position before any whitespace or comments. - pub(crate) fn position(&self) -> Option { - let first = self.tokens.as_slice().first()?; + /// Create a new token source from the given source code which starts at the given offset. + pub(crate) fn from_source(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { + let lexer = Lexer::new(source, mode, start_offset); + let mut source = TokenSource::new(lexer); - let range = match first { - Ok((_, range)) => *range, - Err(error) => error.location(), - }; + // Initialize the token source so that the current token is set correctly. + source.do_bump(); + source + } - Some(range.start()) + /// Returns the kind of the current token. + pub(crate) fn current_kind(&self) -> TokenKind { + self.lexer.current_kind() } - /// Returns the end of the last token - pub(crate) fn end(&self) -> Option { - let last = self.tokens.as_slice().last()?; + /// Returns the range of the current token. + pub(crate) fn current_range(&self) -> TextRange { + self.lexer.current_range() + } - let range = match last { - Ok((_, range)) => *range, - Err(error) => error.location(), - }; + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.lexer.current_flags() + } - Some(range.end()) + /// Calls the underlying [`take_value`] method on the lexer. Refer to its documentation + /// for more info. + /// + /// [`take_value`]: Lexer::take_value + pub(crate) fn take_value(&mut self) -> TokenValue { + self.lexer.take_value() } - /// Returns the next token kind and its range without consuming it. - pub(crate) fn peek(&self) -> Option<(TokenKind, TextRange)> { - let mut iter = self.tokens.as_slice().iter(); + /// Returns the next non-trivia token without consuming it. + /// + /// Use [`peek2`] to get the next two tokens. + /// + /// [`peek2`]: TokenSource::peek2 + pub(crate) fn peek(&mut self) -> TokenKind { + let checkpoint = self.lexer.checkpoint(); + let next = self.next_non_trivia_token(); + self.lexer.rewind(checkpoint); + next + } + /// Returns the next two non-trivia tokens without consuming it. + /// + /// Use [`peek`] to only get the next token. + /// + /// [`peek`]: TokenSource::peek + pub(crate) fn peek2(&mut self) -> (TokenKind, TokenKind) { + let checkpoint = self.lexer.checkpoint(); + let first = self.next_non_trivia_token(); + let second = self.next_non_trivia_token(); + self.lexer.rewind(checkpoint); + (first, second) + } + + /// Bumps the token source to the next non-trivia token. + /// + /// It pushes the given kind to the token vector with the current token range. + pub(crate) fn bump(&mut self, kind: TokenKind) { + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); + self.do_bump(); + } + + /// Bumps the token source to the next non-trivia token without adding the current token to the + /// token vector. It does add the trivia tokens to the token vector. + fn do_bump(&mut self) { loop { - let next = iter.next()?; + let kind = self.lexer.next_token(); + if is_trivia(kind) { + if kind == TokenKind::Comment { + self.comments.push(self.current_range()); + } + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); + continue; + } + break; + } + } - if next.as_ref().is_ok_and(is_trivia) { + /// Returns the next non-trivia token without adding it to the token vector. + fn next_non_trivia_token(&mut self) -> TokenKind { + loop { + let kind = self.lexer.next_token(); + if is_trivia(kind) { continue; } + break kind; + } + } - break Some(match next { - Ok((token, range)) => (TokenKind::from_token(token), *range), - Err(error) => (TokenKind::Unknown, error.location()), - }); + /// Creates a checkpoint to which the token source can later return to using [`Self::rewind`]. + pub(crate) fn checkpoint(&self) -> TokenSourceCheckpoint<'src> { + TokenSourceCheckpoint { + lexer_checkpoint: self.lexer.checkpoint(), + tokens_position: self.tokens.len(), + comments_position: self.comments.len(), } } - pub(crate) fn finish(self) -> Vec { + /// Restore the token source to the given checkpoint. + pub(crate) fn rewind(&mut self, checkpoint: TokenSourceCheckpoint<'src>) { + let TokenSourceCheckpoint { + lexer_checkpoint, + tokens_position, + comments_position, + } = checkpoint; + + self.lexer.rewind(lexer_checkpoint); + self.tokens.truncate(tokens_position); + self.comments.truncate(comments_position); + } + + /// Consumes the token source, returning the collected tokens, comment ranges, and any errors + /// encountered during lexing. The token collection includes both the trivia and non-trivia + /// tokens. + pub(crate) fn finish(mut self) -> (Vec, CommentRanges, Vec) { assert_eq!( - self.tokens.as_slice(), - &[], - "TokenSource was not fully consumed." + self.current_kind(), + TokenKind::EndOfFile, + "TokenSource was not fully consumed" ); - self.errors - } -} + // The `EndOfFile` token shouldn't be included in the token stream, it's mainly to signal + // the parser to stop. This isn't in `do_bump` because it only needs to be done once. + if let Some(last) = self.tokens.pop() { + assert_eq!(last.kind(), TokenKind::EndOfFile); + } -impl FromIterator for TokenSource { - #[inline] - fn from_iter>(iter: T) -> Self { - Self::new(Vec::from_iter(iter)) + let comment_ranges = CommentRanges::new(self.comments); + (self.tokens, comment_ranges, self.lexer.finish()) } } -impl Iterator for TokenSource { - type Item = Spanned; - - #[inline] - fn next(&mut self) -> Option { - loop { - let next = self.tokens.next()?; - - match next { - Ok(token) => { - if is_trivia(&token) { - continue; - } - - break Some(token); - } - - Err(error) => { - let location = error.location(); - self.errors.push(error); - break Some((Tok::Unknown, location)); - } - } - } - } +pub(crate) struct TokenSourceCheckpoint<'src> { + lexer_checkpoint: LexerCheckpoint<'src>, + tokens_position: usize, + comments_position: usize, } -impl FusedIterator for TokenSource {} +/// Allocates a [`Vec`] with an approximated capacity to fit all tokens +/// of `contents`. +/// +/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation. +#[allow(dead_code)] +fn allocate_tokens_vec(contents: &str) -> Vec { + let lower_bound = contents.len().saturating_mul(15) / 100; + Vec::with_capacity(lower_bound) +} -const fn is_trivia(result: &Spanned) -> bool { - matches!(result, (Tok::Comment(_) | Tok::NonLogicalNewline, _)) +fn is_trivia(token: TokenKind) -> bool { + matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline) } diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index c8d82304e90ca..02ebf3243c0b3 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -6,7 +6,7 @@ use ruff_python_ast::relocate::relocate_expr; use ruff_python_ast::{str, Expr}; use ruff_text_size::{TextLen, TextRange}; -use crate::{parse_expression, parse_expression_starts_at}; +use crate::{parse_expression, parse_expression_range}; #[derive(is_macro::Is, Copy, Clone, Debug)] pub enum AnnotationKind { @@ -22,25 +22,30 @@ pub enum AnnotationKind { Complex, } -/// Parse a type annotation from a string. +/// Parses the value of a string literal node (`parsed_contents`) with `range` as a type +/// annotation. The given `source` is the entire source code. pub fn parse_type_annotation( - value: &str, + parsed_contents: &str, range: TextRange, source: &str, ) -> Result<(Expr, AnnotationKind)> { let expression = &source[range]; - if str::raw_contents(expression).is_some_and(|body| body == value) { + if str::raw_contents(expression).is_some_and(|raw_contents| raw_contents == parsed_contents) { // The annotation is considered "simple" if and only if the raw representation (e.g., // `List[int]` within "List[int]") exactly matches the parsed representation. This // isn't the case, e.g., for implicit concatenations, or for annotations that contain // escaped quotes. - let leading_quote = str::leading_quote(expression).unwrap(); - let expr = parse_expression_starts_at(value, range.start() + leading_quote.text_len())?; + let leading_quote_len = str::leading_quote(expression).unwrap().text_len(); + let trailing_quote_len = str::trailing_quote(expression).unwrap().text_len(); + let range = range + .add_start(leading_quote_len) + .sub_end(trailing_quote_len); + let expr = parse_expression_range(source, range)?.into_expr(); Ok((expr, AnnotationKind::Simple)) } else { // Otherwise, consider this a "complex" annotation. - let mut expr = parse_expression(value)?; + let mut expr = parse_expression(parsed_contents)?.into_expr(); relocate_expr(&mut expr, range); Ok((expr, AnnotationKind::Complex)) } diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs index 58896d6912b83..2a3dce311ae62 100644 --- a/crates/ruff_python_parser/tests/fixtures.rs +++ b/crates/ruff_python_parser/tests/fixtures.rs @@ -8,7 +8,7 @@ use annotate_snippets::snippet::{AnnotationType, Slice, Snippet, SourceAnnotatio use ruff_python_ast::visitor::preorder::{walk_module, PreorderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, Mod}; -use ruff_python_parser::{Mode, ParseErrorType, Program}; +use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -36,15 +36,15 @@ fn inline_err() { /// Snapshots the AST. fn test_valid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let program = Program::parse_str(&source, Mode::Module); + let parsed = parse_unchecked(&source, Mode::Module); - if !program.is_valid() { + if !parsed.is_valid() { let line_index = LineIndex::from_source_text(&source); let source_code = SourceCode::new(&source, &line_index); let mut message = "Expected no syntax errors for a valid program but the parser generated the following errors:\n".to_string(); - for error in program.errors() { + for error in parsed.errors() { writeln!( &mut message, "{}\n", @@ -60,11 +60,11 @@ fn test_valid_syntax(input_path: &Path) { panic!("{input_path:?}: {message}"); } - validate_ast(program.ast(), source.text_len(), input_path); + validate_ast(parsed.syntax(), source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); - writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap(); + writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap(); insta::with_settings!({ omit_expression => true, @@ -79,25 +79,25 @@ fn test_valid_syntax(input_path: &Path) { /// Snapshots the AST and the error messages. fn test_invalid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let program = Program::parse_str(&source, Mode::Module); + let parsed = parse_unchecked(&source, Mode::Module); assert!( - !program.is_valid(), + !parsed.is_valid(), "{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors." ); - validate_ast(program.ast(), source.text_len(), input_path); + validate_ast(parsed.syntax(), source.text_len(), input_path); let mut output = String::new(); writeln!(&mut output, "## AST").unwrap(); - writeln!(&mut output, "\n```\n{:#?}\n```", program.ast()).unwrap(); + writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap(); writeln!(&mut output, "## Errors\n").unwrap(); let line_index = LineIndex::from_source_text(&source); let source_code = SourceCode::new(&source, &line_index); - for error in program.errors() { + for error in parsed.errors() { writeln!( &mut output, "{}\n", @@ -126,20 +126,22 @@ fn test_invalid_syntax(input_path: &Path) { #[allow(clippy::print_stdout)] fn parser_quick_test() { let source = "\ -data[*x,] +def foo() + pass "; - let program = Program::parse_str(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); - println!("AST:\n----\n{:#?}", program.ast()); + println!("AST:\n----\n{:#?}", parsed.syntax()); + println!("Tokens:\n-------\n{:#?}", parsed.tokens()); - if !program.is_valid() { + if !parsed.is_valid() { println!("Errors:\n-------"); let line_index = LineIndex::from_source_text(source); let source_code = SourceCode::new(source, &line_index); - for error in program.errors() { + for error in parsed.errors() { // Sometimes the code frame doesn't show the error message, so we print // the message as well. println!("Syntax Error: {error}"); diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap index 3ced503debc87..ef88c92d751ae 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@ann_assign_stmt_type_alias_annotation.py.snap @@ -96,13 +96,6 @@ Module( ``` ## Errors - | -1 | a: type X = int - | ^^^^ Syntax Error: Expected an identifier, but found a keyword 'type' that cannot be used here -2 | lambda: type X = int - | - - | 1 | a: type X = int | ^ Syntax Error: Simple statements must be separated by newlines or semicolons @@ -110,13 +103,6 @@ Module( | - | -1 | a: type X = int -2 | lambda: type X = int - | ^^^^ Syntax Error: Expected an identifier, but found a keyword 'type' that cannot be used here - | - - | 1 | a: type X = int 2 | lambda: type X = int diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap index d64d49c708fd7..37154ac1eedae 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@async_unexpected_token.py.snap @@ -7,7 +7,7 @@ input_file: crates/ruff_python_parser/resources/inline/err/async_unexpected_toke ``` Module( ModModule { - range: 0..220, + range: 0..116, body: [ ClassDef( StmtClassDef { @@ -113,56 +113,41 @@ Module( ], }, ), - Expr( - StmtExpr { - range: 192..197, - value: Name( + Match( + StmtMatch { + range: 88..115, + subject: Name( ExprName { - range: 192..197, - id: "match", - ctx: Load, - }, - ), - }, - ), - AnnAssign( - StmtAnnAssign { - range: 198..203, - target: Name( - ExprName { - range: 198..202, + range: 94..98, id: "test", - ctx: Store, - }, - ), - annotation: Name( - ExprName { - range: 203..203, - id: "", - ctx: Invalid, - }, - ), - value: None, - simple: true, - }, - ), - AnnAssign( - StmtAnnAssign { - range: 213..219, - target: Name( - ExprName { - range: 213..214, - id: "_", - ctx: Store, + ctx: Load, }, ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 216..219, + cases: [ + MatchCase { + range: 104..115, + pattern: MatchAs( + PatternMatchAs { + range: 109..110, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 112..115, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 112..115, + }, + ), + }, + ), + ], }, - ), - value: None, - simple: true, + ], }, ), ], @@ -194,7 +179,7 @@ Module( 3 | async x = 1 | ^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found name 4 | async async def foo(): ... -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because +5 | async match test: | @@ -203,55 +188,15 @@ Module( 3 | async x = 1 4 | async async def foo(): ... | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found 'async' -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer - | - - - | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found name -8 | case _: ... - | - - - | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons -8 | case _: ... - | - - - | -5 | # TODO(dhruvmanila): Here, `match` is actually a Name token because -6 | # of the soft keyword # transformer -7 | async match test: - | ^ Syntax Error: Expected an expression -8 | case _: ... +5 | async match test: +6 | case _: ... | | -6 | # of the soft keyword # transformer -7 | async match test: -8 | case _: ... - | ^^^^ Syntax Error: Unexpected indentation - | - - - | -6 | # of the soft keyword # transformer -7 | async match test: -8 | case _: ... - | ^^^^ Syntax Error: Expected a statement - | - - - | -7 | async match test: -8 | case _: ... +3 | async x = 1 +4 | async async def foo(): ... +5 | async match test: + | ^^^^^ Syntax Error: Expected 'def', 'with' or 'for' to follow 'async', found 'match' +6 | case _: ... | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap new file mode 100644 index 0000000000000..70c3203746c26 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword.py.snap @@ -0,0 +1,66 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..33, + body: [ + Match( + StmtMatch { + range: 0..32, + subject: Yield( + ExprYield { + range: 6..15, + value: Some( + Name( + ExprName { + range: 12..15, + id: "foo", + ctx: Load, + }, + ), + ), + }, + ), + cases: [ + MatchCase { + range: 21..32, + pattern: MatchAs( + PatternMatchAs { + range: 26..27, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 29..32, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 29..32, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match yield foo: + | ^^^^^^^^^ Syntax Error: Yield expression cannot be used here +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap new file mode 100644 index 0000000000000..4f420387e903b --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_classify_as_keyword_or_identifier.py.snap @@ -0,0 +1,65 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_classify_as_keyword_or_identifier.py +--- +## AST + +``` +Module( + ModModule { + range: 0..39, + body: [ + Match( + StmtMatch { + range: 0..38, + subject: Starred( + ExprStarred { + range: 6..10, + value: Name( + ExprName { + range: 7..10, + id: "foo", + ctx: Load, + }, + ), + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 27..38, + pattern: MatchAs( + PatternMatchAs { + range: 32..33, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 35..38, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 35..38, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match *foo: # Keyword + | ^^^^ Syntax Error: Starred expression cannot be used here +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap new file mode 100644 index 0000000000000..4b4f623995ac8 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_expected_colon.py.snap @@ -0,0 +1,76 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/match_expected_colon.py +--- +## AST + +``` +Module( + ModModule { + range: 0..29, + body: [ + Match( + StmtMatch { + range: 0..28, + subject: List( + ExprList { + range: 6..12, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 7..8, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 10..11, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 17..28, + pattern: MatchAs( + PatternMatchAs { + range: 22..23, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 25..28, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 25..28, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | match [1, 2] + | ^ Syntax Error: Expected ':', found newline +2 | case _: ... + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap index 4736985e44469..882bb79838791 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_missing_pattern.py.snap @@ -7,38 +7,48 @@ input_file: crates/ruff_python_parser/resources/inline/err/match_stmt_missing_pa ``` Module( ModModule { - range: 0..110, + range: 0..24, body: [ Match( StmtMatch { - range: 86..99, + range: 0..23, subject: Name( ExprName { - range: 92..93, + range: 6..7, id: "x", ctx: Load, }, ), - cases: [], - }, - ), - AnnAssign( - StmtAnnAssign { - range: 99..109, - target: Name( - ExprName { - range: 99..103, - id: "case", - ctx: Store, - }, - ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 106..109, + cases: [ + MatchCase { + range: 13..23, + pattern: MatchValue( + PatternMatchValue { + range: 17..17, + value: Name( + ExprName { + range: 17..17, + id: "", + ctx: Invalid, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 20..23, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 20..23, + }, + ), + }, + ), + ], }, - ), - value: None, - simple: true, + ], }, ), ], @@ -48,14 +58,7 @@ Module( ## Errors | -1 | # TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer -2 | match x: -3 | case : ... - | ^^^^ Syntax Error: Expected `case` block - | - - - | -2 | match x: -3 | case : ... +1 | match x: +2 | case : ... + | ^ Syntax Error: Expected a pattern | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap index 0d11f2c94c928..2e8be2f3068a7 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@match_stmt_no_newline_before_case.py.snap @@ -11,7 +11,7 @@ Module( body: [ Match( StmtMatch { - range: 0..10, + range: 0..22, subject: Name( ExprName { range: 6..9, @@ -19,38 +19,31 @@ Module( ctx: Load, }, ), - cases: [], - }, - ), - Expr( - StmtExpr { - range: 11..15, - value: Name( - ExprName { - range: 11..15, - id: "case", - ctx: Load, - }, - ), - }, - ), - AnnAssign( - StmtAnnAssign { - range: 16..22, - target: Name( - ExprName { - range: 16..17, - id: "_", - ctx: Store, + cases: [ + MatchCase { + range: 11..22, + pattern: MatchAs( + PatternMatchAs { + range: 16..17, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 19..22, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 19..22, + }, + ), + }, + ), + ], }, - ), - annotation: EllipsisLiteral( - ExprEllipsisLiteral { - range: 19..22, - }, - ), - value: None, - simple: true, + ], }, ), ], @@ -61,11 +54,10 @@ Module( | 1 | match foo: case _: ... - | ^^^^ Syntax Error: Expected newline, found name + | ^^^^ Syntax Error: Expected newline, found 'case' | | 1 | match foo: case _: ... - | ^ Syntax Error: Simple statements must be separated by newlines or semicolons | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap index d8e9b3da91f36..3b1a06c49d7a2 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_4.py.snap @@ -42,14 +42,14 @@ Module( ), ], patterns: [ - MatchValue( - PatternMatchValue { + MatchAs( + PatternMatchAs { range: 164..166, - value: Name( - ExprName { - range: 164..166, + pattern: None, + name: Some( + Identifier { id: "as", - ctx: Load, + range: 164..166, }, ), }, diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap new file mode 100644 index 0000000000000..d9ddd2be7c2d1 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@except_stmt_as_name_soft_keyword.py.snap @@ -0,0 +1,133 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/except_stmt_as_name_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..100, + body: [ + Try( + StmtTry { + range: 0..99, + body: [ + Expr( + StmtExpr { + range: 5..8, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 5..8, + }, + ), + }, + ), + ], + handlers: [ + ExceptHandler( + ExceptHandlerExceptHandler { + range: 9..39, + type_: Some( + Name( + ExprName { + range: 16..25, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "match", + range: 29..34, + }, + ), + body: [ + Expr( + StmtExpr { + range: 36..39, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 36..39, + }, + ), + }, + ), + ], + }, + ), + ExceptHandler( + ExceptHandlerExceptHandler { + range: 40..69, + type_: Some( + Name( + ExprName { + range: 47..56, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "case", + range: 60..64, + }, + ), + body: [ + Expr( + StmtExpr { + range: 66..69, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 66..69, + }, + ), + }, + ), + ], + }, + ), + ExceptHandler( + ExceptHandlerExceptHandler { + range: 70..99, + type_: Some( + Name( + ExprName { + range: 77..86, + id: "Exception", + ctx: Load, + }, + ), + ), + name: Some( + Identifier { + id: "type", + range: 90..94, + }, + ), + body: [ + Expr( + StmtExpr { + range: 96..99, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 96..99, + }, + ), + }, + ), + ], + }, + ), + ], + orelse: [], + finalbody: [], + is_star: false, + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap new file mode 100644 index 0000000000000..9ab3b52aba38a --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@from_import_soft_keyword_module_name.py.snap @@ -0,0 +1,103 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/from_import_soft_keyword_module_name.py +--- +## AST + +``` +Module( + ModModule { + range: 0..104, + body: [ + ImportFrom( + StmtImportFrom { + range: 0..25, + module: Some( + Identifier { + id: "match", + range: 5..10, + }, + ), + names: [ + Alias { + range: 18..25, + name: Identifier { + id: "pattern", + range: 18..25, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 26..46, + module: Some( + Identifier { + id: "type", + range: 31..35, + }, + ), + names: [ + Alias { + range: 43..46, + name: Identifier { + id: "bar", + range: 43..46, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 47..71, + module: Some( + Identifier { + id: "case", + range: 52..56, + }, + ), + names: [ + Alias { + range: 64..71, + name: Identifier { + id: "pattern", + range: 64..71, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + StmtImportFrom { + range: 72..103, + module: Some( + Identifier { + id: "match.type.case", + range: 77..92, + }, + ), + names: [ + Alias { + range: 100..103, + name: Identifier { + id: "foo", + range: 100..103, + }, + asname: None, + }, + ], + level: 0, + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap new file mode 100644 index 0000000000000..b4e8a5ae633e5 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@import_as_name_soft_keyword.py.snap @@ -0,0 +1,75 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/import_as_name_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..58, + body: [ + Import( + StmtImport { + range: 0..19, + names: [ + Alias { + range: 7..19, + name: Identifier { + id: "foo", + range: 7..10, + }, + asname: Some( + Identifier { + id: "match", + range: 14..19, + }, + ), + }, + ], + }, + ), + Import( + StmtImport { + range: 20..38, + names: [ + Alias { + range: 27..38, + name: Identifier { + id: "bar", + range: 27..30, + }, + asname: Some( + Identifier { + id: "case", + range: 34..38, + }, + ), + }, + ], + }, + ), + Import( + StmtImport { + range: 39..57, + names: [ + Alias { + range: 46..57, + name: Identifier { + id: "baz", + range: 46..49, + }, + asname: Some( + Identifier { + id: "type", + range: 53..57, + }, + ), + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap new file mode 100644 index 0000000000000..eecf69925d987 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_as_pattern_soft_keyword.py.snap @@ -0,0 +1,113 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_as_pattern_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..69, + body: [ + Match( + StmtMatch { + range: 0..68, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..29, + pattern: MatchAs( + PatternMatchAs { + range: 20..24, + pattern: None, + name: Some( + Identifier { + id: "case", + range: 20..24, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 26..29, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 26..29, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 34..49, + pattern: MatchAs( + PatternMatchAs { + range: 39..44, + pattern: None, + name: Some( + Identifier { + id: "match", + range: 39..44, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 46..49, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 46..49, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 54..68, + pattern: MatchAs( + PatternMatchAs { + range: 59..63, + pattern: None, + name: Some( + Identifier { + id: "type", + range: 59..63, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 65..68, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 65..68, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap new file mode 100644 index 0000000000000..fb3410108d3e7 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_attr_pattern_soft_keyword.py.snap @@ -0,0 +1,231 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_attr_pattern_soft_keyword.py +--- +## AST + +``` +Module( + ModModule { + range: 0..131, + body: [ + Match( + StmtMatch { + range: 0..130, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..34, + pattern: MatchValue( + PatternMatchValue { + range: 20..29, + value: Attribute( + ExprAttribute { + range: 20..29, + value: Name( + ExprName { + range: 20..25, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 26..29, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 31..34, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 31..34, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 39..57, + pattern: MatchValue( + PatternMatchValue { + range: 44..52, + value: Attribute( + ExprAttribute { + range: 44..52, + value: Name( + ExprName { + range: 44..48, + id: "case", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 49..52, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 54..57, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 54..57, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 62..80, + pattern: MatchValue( + PatternMatchValue { + range: 67..75, + value: Attribute( + ExprAttribute { + range: 67..75, + value: Name( + ExprName { + range: 67..71, + id: "type", + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 72..75, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 77..80, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 77..80, + }, + ), + }, + ), + ], + }, + MatchCase { + range: 85..130, + pattern: MatchValue( + PatternMatchValue { + range: 90..125, + value: Attribute( + ExprAttribute { + range: 90..125, + value: Attribute( + ExprAttribute { + range: 90..119, + value: Attribute( + ExprAttribute { + range: 90..114, + value: Attribute( + ExprAttribute { + range: 90..109, + value: Attribute( + ExprAttribute { + range: 90..105, + value: Attribute( + ExprAttribute { + range: 90..100, + value: Name( + ExprName { + range: 90..95, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "case", + range: 96..100, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "type", + range: 101..105, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "bar", + range: 106..109, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "type", + range: 110..114, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "case", + range: 115..119, + }, + ctx: Load, + }, + ), + attr: Identifier { + id: "match", + range: 120..125, + }, + ctx: Load, + }, + ), + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 127..130, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 127..130, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap new file mode 100644 index 0000000000000..21dd833fc8031 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_1.py.snap @@ -0,0 +1,44 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..18, + body: [ + Expr( + StmtExpr { + range: 0..17, + value: Compare( + ExprCompare { + range: 0..17, + left: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + ops: [ + NotIn, + ], + comparators: [ + Name( + ExprName { + range: 13..17, + id: "case", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap new file mode 100644 index 0000000000000..c2023f5c4ac3a --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_identifier_2.py.snap @@ -0,0 +1,319 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_identifier_2.py +--- +## AST + +``` +Module( + ModModule { + range: 0..149, + body: [ + Expr( + StmtExpr { + range: 0..5, + value: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 6..18, + value: Compare( + ExprCompare { + range: 6..18, + left: Name( + ExprName { + range: 6..11, + id: "match", + ctx: Load, + }, + ), + ops: [ + NotEq, + ], + comparators: [ + Name( + ExprName { + range: 15..18, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 19..31, + value: Tuple( + ExprTuple { + range: 19..31, + elts: [ + Name( + ExprName { + range: 20..23, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 25..30, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + parenthesized: true, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 32..44, + value: List( + ExprList { + range: 32..44, + elts: [ + Name( + ExprName { + range: 33..36, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 38..43, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 45..57, + value: Set( + ExprSet { + range: 45..57, + elts: [ + Name( + ExprName { + range: 46..49, + id: "foo", + ctx: Load, + }, + ), + Name( + ExprName { + range: 51..56, + id: "match", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 58..63, + value: Name( + ExprName { + range: 58..63, + id: "match", + ctx: Load, + }, + ), + }, + ), + AnnAssign( + StmtAnnAssign { + range: 65..75, + target: Name( + ExprName { + range: 65..70, + id: "match", + ctx: Store, + }, + ), + annotation: Name( + ExprName { + range: 72..75, + id: "int", + ctx: Load, + }, + ), + value: None, + simple: true, + }, + ), + Expr( + StmtExpr { + range: 76..82, + value: Tuple( + ExprTuple { + range: 76..82, + elts: [ + Name( + ExprName { + range: 76..81, + id: "match", + ctx: Load, + }, + ), + ], + ctx: Load, + parenthesized: false, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 83..92, + value: Attribute( + ExprAttribute { + range: 83..92, + value: Name( + ExprName { + range: 83..88, + id: "match", + ctx: Load, + }, + ), + attr: Identifier { + id: "foo", + range: 89..92, + }, + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 93..104, + value: BinOp( + ExprBinOp { + range: 93..104, + left: Name( + ExprName { + range: 93..98, + id: "match", + ctx: Load, + }, + ), + op: Div, + right: Name( + ExprName { + range: 101..104, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 105..117, + value: BinOp( + ExprBinOp { + range: 105..117, + left: Name( + ExprName { + range: 105..110, + id: "match", + ctx: Load, + }, + ), + op: LShift, + right: Name( + ExprName { + range: 114..117, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 118..131, + value: BoolOp( + ExprBoolOp { + range: 118..131, + op: And, + values: [ + Name( + ExprName { + range: 118..123, + id: "match", + ctx: Load, + }, + ), + Name( + ExprName { + range: 128..131, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + Expr( + StmtExpr { + range: 132..148, + value: Compare( + ExprCompare { + range: 132..148, + left: Name( + ExprName { + range: 132..137, + id: "match", + ctx: Load, + }, + ), + ops: [ + IsNot, + ], + comparators: [ + Name( + ExprName { + range: 145..148, + id: "foo", + ctx: Load, + }, + ), + ], + }, + ), + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap new file mode 100644 index 0000000000000..b25b756c8ceba --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_1.py.snap @@ -0,0 +1,578 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..358, + body: [ + Match( + StmtMatch { + range: 0..26, + subject: Name( + ExprName { + range: 6..9, + id: "foo", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 15..26, + pattern: MatchAs( + PatternMatchAs { + range: 20..21, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 23..26, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 23..26, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 27..51, + subject: NumberLiteral( + ExprNumberLiteral { + range: 33..34, + value: Int( + 1, + ), + }, + ), + cases: [ + MatchCase { + range: 40..51, + pattern: MatchAs( + PatternMatchAs { + range: 45..46, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 48..51, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 48..51, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 52..78, + subject: NumberLiteral( + ExprNumberLiteral { + range: 58..61, + value: Float( + 1.0, + ), + }, + ), + cases: [ + MatchCase { + range: 67..78, + pattern: MatchAs( + PatternMatchAs { + range: 72..73, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 75..78, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 75..78, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 79..104, + subject: NumberLiteral( + ExprNumberLiteral { + range: 85..87, + value: Complex { + real: 0.0, + imag: 1.0, + }, + }, + ), + cases: [ + MatchCase { + range: 93..104, + pattern: MatchAs( + PatternMatchAs { + range: 98..99, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 101..104, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 101..104, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 105..133, + subject: StringLiteral( + ExprStringLiteral { + range: 111..116, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 111..116, + value: "foo", + flags: StringLiteralFlags { + quote_style: Double, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + cases: [ + MatchCase { + range: 122..133, + pattern: MatchAs( + PatternMatchAs { + range: 127..128, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 130..133, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 130..133, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 134..167, + subject: FString( + ExprFString { + range: 140..150, + value: FStringValue { + inner: Single( + FString( + FString { + range: 140..150, + elements: [ + Literal( + FStringLiteralElement { + range: 142..146, + value: "foo ", + }, + ), + Expression( + FStringExpressionElement { + range: 146..149, + expression: Name( + ExprName { + range: 147..148, + id: "x", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, + }, + ), + ), + }, + }, + ), + cases: [ + MatchCase { + range: 156..167, + pattern: MatchAs( + PatternMatchAs { + range: 161..162, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 164..167, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 164..167, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 168..197, + subject: Set( + ExprSet { + range: 174..180, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 175..176, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 178..179, + value: Int( + 2, + ), + }, + ), + ], + }, + ), + cases: [ + MatchCase { + range: 186..197, + pattern: MatchAs( + PatternMatchAs { + range: 191..192, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 194..197, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 194..197, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 198..225, + subject: UnaryOp( + ExprUnaryOp { + range: 204..208, + op: Invert, + operand: Name( + ExprName { + range: 205..208, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 214..225, + pattern: MatchAs( + PatternMatchAs { + range: 219..220, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 222..225, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 222..225, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 226..252, + subject: EllipsisLiteral( + ExprEllipsisLiteral { + range: 232..235, + }, + ), + cases: [ + MatchCase { + range: 241..252, + pattern: MatchAs( + PatternMatchAs { + range: 246..247, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 249..252, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 249..252, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 253..283, + subject: UnaryOp( + ExprUnaryOp { + range: 259..266, + op: Not, + operand: Name( + ExprName { + range: 263..266, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 272..283, + pattern: MatchAs( + PatternMatchAs { + range: 277..278, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 280..283, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 280..283, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 284..318, + subject: Await( + ExprAwait { + range: 290..301, + value: Call( + ExprCall { + range: 296..301, + func: Name( + ExprName { + range: 296..299, + id: "foo", + ctx: Load, + }, + ), + arguments: Arguments { + range: 299..301, + args: [], + keywords: [], + }, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 307..318, + pattern: MatchAs( + PatternMatchAs { + range: 312..313, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 315..318, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 315..318, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 319..357, + subject: Lambda( + ExprLambda { + range: 325..340, + parameters: Some( + Parameters { + range: 332..335, + posonlyargs: [], + args: [ + ParameterWithDefault { + range: 332..335, + parameter: Parameter { + range: 332..335, + name: Identifier { + id: "foo", + range: 332..335, + }, + annotation: None, + }, + default: None, + }, + ], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + ), + body: Name( + ExprName { + range: 337..340, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 346..357, + pattern: MatchAs( + PatternMatchAs { + range: 351..352, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 354..357, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 354..357, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap new file mode 100644 index 0000000000000..88a69846f4955 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_2.py.snap @@ -0,0 +1,233 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_2.py +--- +## AST + +``` +Module( + ModModule { + range: 0..170, + body: [ + Match( + StmtMatch { + range: 0..28, + subject: Name( + ExprName { + range: 6..11, + id: "match", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 17..28, + pattern: MatchAs( + PatternMatchAs { + range: 22..23, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 25..28, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 25..28, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 29..56, + subject: Name( + ExprName { + range: 35..39, + id: "case", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 45..56, + pattern: MatchAs( + PatternMatchAs { + range: 50..51, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 53..56, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 53..56, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 57..84, + subject: Name( + ExprName { + range: 63..67, + id: "type", + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 73..84, + pattern: MatchAs( + PatternMatchAs { + range: 78..79, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 81..84, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 81..84, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 85..112, + subject: NoneLiteral( + ExprNoneLiteral { + range: 91..95, + }, + ), + cases: [ + MatchCase { + range: 101..112, + pattern: MatchAs( + PatternMatchAs { + range: 106..107, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 109..112, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 109..112, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 113..140, + subject: BooleanLiteral( + ExprBooleanLiteral { + range: 119..123, + value: true, + }, + ), + cases: [ + MatchCase { + range: 129..140, + pattern: MatchAs( + PatternMatchAs { + range: 134..135, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 137..140, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 137..140, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Match( + StmtMatch { + range: 141..169, + subject: BooleanLiteral( + ExprBooleanLiteral { + range: 147..152, + value: false, + }, + ), + cases: [ + MatchCase { + range: 158..169, + pattern: MatchAs( + PatternMatchAs { + range: 163..164, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 166..169, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 166..169, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap new file mode 100644 index 0000000000000..67f1d122bc196 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@match_classify_as_keyword_or_identifier.py.snap @@ -0,0 +1,291 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/match_classify_as_keyword_or_identifier.py +--- +## AST + +``` +Module( + ModModule { + range: 0..225, + body: [ + Expr( + StmtExpr { + range: 0..12, + value: Call( + ExprCall { + range: 0..12, + func: Name( + ExprName { + range: 0..5, + id: "match", + ctx: Load, + }, + ), + arguments: Arguments { + range: 6..12, + args: [ + NumberLiteral( + ExprNumberLiteral { + range: 7..8, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 10..11, + value: Int( + 2, + ), + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + Match( + StmtMatch { + range: 27..67, + subject: Tuple( + ExprTuple { + range: 33..39, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 34..35, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 37..38, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + parenthesized: true, + }, + ), + cases: [ + MatchCase { + range: 56..67, + pattern: MatchAs( + PatternMatchAs { + range: 61..62, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 64..67, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 64..67, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Expr( + StmtExpr { + range: 68..78, + value: Subscript( + ExprSubscript { + range: 68..78, + value: Name( + ExprName { + range: 68..73, + id: "match", + ctx: Load, + }, + ), + slice: Slice( + ExprSlice { + range: 75..77, + lower: Some( + NumberLiteral( + ExprNumberLiteral { + range: 75..76, + value: Int( + 1, + ), + }, + ), + ), + upper: None, + step: None, + }, + ), + ctx: Load, + }, + ), + }, + ), + Match( + StmtMatch { + range: 93..133, + subject: List( + ExprList { + range: 99..105, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 100..101, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 103..104, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + cases: [ + MatchCase { + range: 122..133, + pattern: MatchAs( + PatternMatchAs { + range: 127..128, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 130..133, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 130..133, + }, + ), + }, + ), + ], + }, + ], + }, + ), + Expr( + StmtExpr { + range: 134..145, + value: BinOp( + ExprBinOp { + range: 134..145, + left: Name( + ExprName { + range: 134..139, + id: "match", + ctx: Load, + }, + ), + op: Mult, + right: Name( + ExprName { + range: 142..145, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Expr( + StmtExpr { + range: 160..171, + value: BinOp( + ExprBinOp { + range: 160..171, + left: Name( + ExprName { + range: 160..165, + id: "match", + ctx: Load, + }, + ), + op: Sub, + right: Name( + ExprName { + range: 168..171, + id: "foo", + ctx: Load, + }, + ), + }, + ), + }, + ), + Match( + StmtMatch { + range: 186..224, + subject: UnaryOp( + ExprUnaryOp { + range: 192..196, + op: USub, + operand: Name( + ExprName { + range: 193..196, + id: "foo", + ctx: Load, + }, + ), + }, + ), + cases: [ + MatchCase { + range: 213..224, + pattern: MatchAs( + PatternMatchAs { + range: 218..219, + pattern: None, + name: None, + }, + ), + guard: None, + body: [ + Expr( + StmtExpr { + range: 221..224, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 221..224, + }, + ), + }, + ), + ], + }, + ], + }, + ), + ], + }, +) +``` diff --git a/crates/ruff_python_semantic/src/analyze/type_inference.rs b/crates/ruff_python_semantic/src/analyze/type_inference.rs index e2ecd8690e3aa..6f7dfb0469a2c 100644 --- a/crates/ruff_python_semantic/src/analyze/type_inference.rs +++ b/crates/ruff_python_semantic/src/analyze/type_inference.rs @@ -428,12 +428,12 @@ impl NumberLike { #[cfg(test)] mod tests { - use ruff_python_ast::Expr; - use ruff_python_parser::parse_expression; + use ruff_python_ast::ModExpression; + use ruff_python_parser::{parse_expression, Parsed}; use crate::analyze::type_inference::{NumberLike, PythonType, ResolvedPythonType}; - fn parse(expression: &str) -> Expr { + fn parse(expression: &str) -> Parsed { parse_expression(expression).unwrap() } @@ -441,95 +441,95 @@ mod tests { fn type_inference() { // Atoms. assert_eq!( - ResolvedPythonType::from(&parse("1")), + ResolvedPythonType::from(parse("1").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("'Hello, world'")), + ResolvedPythonType::from(parse("'Hello, world'").expr()), ResolvedPythonType::Atom(PythonType::String) ); assert_eq!( - ResolvedPythonType::from(&parse("b'Hello, world'")), + ResolvedPythonType::from(parse("b'Hello, world'").expr()), ResolvedPythonType::Atom(PythonType::Bytes) ); assert_eq!( - ResolvedPythonType::from(&parse("'Hello' % 'world'")), + ResolvedPythonType::from(parse("'Hello' % 'world'").expr()), ResolvedPythonType::Atom(PythonType::String) ); // Boolean operators. assert_eq!( - ResolvedPythonType::from(&parse("1 and 2")), + ResolvedPythonType::from(parse("1 and 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 and True")), + ResolvedPythonType::from(parse("1 and True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); // Binary operators. assert_eq!( - ResolvedPythonType::from(&parse("1.0 * 2")), + ResolvedPythonType::from(parse("1.0 * 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("2 * 1.0")), + ResolvedPythonType::from(parse("2 * 1.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1.0 * 2j")), + ResolvedPythonType::from(parse("1.0 * 2j").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Complex)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 / True")), + ResolvedPythonType::from(parse("1 / True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 / 2")), + ResolvedPythonType::from(parse("1 / 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("{1, 2} - {2}")), + ResolvedPythonType::from(parse("{1, 2} - {2}").expr()), ResolvedPythonType::Atom(PythonType::Set) ); // Unary operators. assert_eq!( - ResolvedPythonType::from(&parse("-1")), + ResolvedPythonType::from(parse("-1").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("-1.0")), + ResolvedPythonType::from(parse("-1.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("-1j")), + ResolvedPythonType::from(parse("-1j").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Complex)) ); assert_eq!( - ResolvedPythonType::from(&parse("-True")), + ResolvedPythonType::from(parse("-True").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("not 'Hello'")), + ResolvedPythonType::from(parse("not 'Hello'").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Bool)) ); assert_eq!( - ResolvedPythonType::from(&parse("not x.y.z")), + ResolvedPythonType::from(parse("not x.y.z").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Bool)) ); // Conditional expressions. assert_eq!( - ResolvedPythonType::from(&parse("1 if True else 2")), + ResolvedPythonType::from(parse("1 if True else 2").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 if True else 2.0")), + ResolvedPythonType::from(parse("1 if True else 2.0").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Float)) ); assert_eq!( - ResolvedPythonType::from(&parse("1 if True else False")), + ResolvedPythonType::from(parse("1 if True else False").expr()), ResolvedPythonType::Atom(PythonType::Number(NumberLike::Integer)) ); } diff --git a/crates/ruff_python_trivia_integration_tests/Cargo.toml b/crates/ruff_python_trivia_integration_tests/Cargo.toml index 9e0480a7e9540..7089c32214b17 100644 --- a/crates/ruff_python_trivia_integration_tests/Cargo.toml +++ b/crates/ruff_python_trivia_integration_tests/Cargo.toml @@ -12,7 +12,6 @@ license.workspace = true [dependencies] [dev-dependencies] -ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } diff --git a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs index fe6cc47ac9d7d..8bc8c5eb4c579 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs @@ -1,5 +1,4 @@ -use ruff_python_index::Indexer; -use ruff_python_parser::{tokenize, Mode}; +use ruff_python_parser::{parse_unchecked, Mode}; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -7,12 +6,11 @@ use ruff_text_size::TextSize; fn block_comments_two_line_block_at_start() { // arrange let source = "# line 1\n# line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]); @@ -22,12 +20,11 @@ fn block_comments_two_line_block_at_start() { fn block_comments_indented_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]); @@ -37,12 +34,11 @@ fn block_comments_indented_block() { fn block_comments_single_line_is_not_a_block() { // arrange let source = "\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -52,12 +48,11 @@ fn block_comments_single_line_is_not_a_block() { fn block_comments_lines_with_code_not_a_block() { // arrange let source = "x = 1 # line 1\ny = 2 # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -67,12 +62,11 @@ fn block_comments_lines_with_code_not_a_block() { fn block_comments_sequential_lines_not_in_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -87,12 +81,11 @@ fn block_comments_lines_in_triple_quotes_not_a_block() { # line 2 """ "#; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!(block_comments, Vec::::new()); @@ -124,12 +117,11 @@ y = 2 # do not form a block comment # therefore do not form a block comment """ "#; - let tokens = tokenize(source, Mode::Module); + let parsed = parse_unchecked(source, Mode::Module); let locator = Locator::new(source); - let indexer = Indexer::from_tokens(&tokens, &locator); // act - let block_comments = indexer.comment_ranges().block_comments(&locator); + let block_comments = parsed.comment_ranges().block_comments(&locator); // assert assert_eq!( diff --git a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs index 5ac4296ea68fe..7db3766463b9c 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs @@ -1,7 +1,6 @@ use insta::assert_debug_snapshot; -use ruff_python_parser::lexer::lex; -use ruff_python_parser::{Mode, Tok}; +use ruff_python_parser::{parse_unchecked, Mode}; use ruff_python_trivia::{lines_after, lines_before, SimpleToken, SimpleTokenizer}; use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind}; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -23,17 +22,8 @@ impl TokenizationTestCase { } fn tokenize_reverse(&self) -> Vec { - let comment_ranges: Vec<_> = lex(self.source, Mode::Module) - .filter_map(|result| { - let (token, range) = result.expect("Input to be a valid python program."); - if matches!(token, Tok::Comment(_)) { - Some(range) - } else { - None - } - }) - .collect(); - BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect() + let parsed = parse_unchecked(self.source, Mode::Module); + BackwardsTokenizer::new(self.source, self.range, parsed.comment_ranges()).collect() } fn tokens(&self) -> &[SimpleToken] { diff --git a/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs b/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs index 709a3a3d189b6..d73e2052b3ef4 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/whitespace.rs @@ -1,4 +1,4 @@ -use ruff_python_parser::{parse_suite, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_python_trivia::has_trailing_content; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -6,26 +6,26 @@ use ruff_text_size::Ranged; #[test] fn trailing_content() -> Result<(), ParseError> { let contents = "x = 1"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); let contents = "x = 1; y = 2"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(has_trailing_content(stmt.end(), &locator)); let contents = "x = 1 "; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); let contents = "x = 1 # Comment"; - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); @@ -34,8 +34,8 @@ x = 1 y = 2 " .trim(); - let program = parse_suite(contents)?; - let stmt = program.first().unwrap(); + let suite = parse_module(contents)?.into_suite(); + let stmt = suite.first().unwrap(); let locator = Locator::new(contents); assert!(!has_trailing_content(stmt.end(), &locator)); diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index b984143fa2f92..de6340d7f0453 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -7,7 +7,7 @@ use ruff_diagnostics::{Applicability, Diagnostic, DiagnosticKind, Edit, Fix}; use ruff_linter::{ directives::{extract_directives, Flags}, generate_noqa_edits, - linter::{check_path, LinterResult, TokenSource}, + linter::{check_path, LinterResult}, packaging::detect_package_root, registry::AsRule, settings::flags, @@ -16,7 +16,6 @@ use ruff_linter::{ use ruff_notebook::Notebook; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::AsMode; use ruff_source_file::{LineIndex, Locator}; use ruff_text_size::{Ranged, TextRange}; use ruff_workspace::resolver::match_any_exclusion; @@ -95,8 +94,8 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno let source_type = query.source_type(); - // Tokenize once. - let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + // Parse once. + let parsed = ruff_python_parser::parse_unchecked_source(source_kind.source_code(), source_type); let index = LineIndex::from_source_text(source_kind.source_code()); @@ -104,13 +103,13 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno let locator = Locator::with_index(source_kind.source_code(), index.clone()); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. - let directives = extract_directives(&tokens, Flags::all(), &locator, &indexer); + let directives = extract_directives(&parsed, Flags::all(), &locator, &indexer); // Generate checks. let LinterResult { data, .. } = check_path( @@ -124,14 +123,14 @@ pub(crate) fn check(query: &DocumentQuery, encoding: PositionEncoding) -> Diagno flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let noqa_edits = generate_noqa_edits( query.virtual_file_path(), data.as_slice(), &locator, - indexer.comment_ranges(), + parsed.comment_ranges(), &linter_settings.external, &directives.noqa_line_for, stylist.line_ending(), diff --git a/crates/ruff_wasm/Cargo.toml b/crates/ruff_wasm/Cargo.toml index 35ba4e102eab4..c8cdfc9e71a29 100644 --- a/crates/ruff_wasm/Cargo.toml +++ b/crates/ruff_wasm/Cargo.toml @@ -28,7 +28,6 @@ ruff_python_index = { workspace = true } ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } -ruff_python_trivia = { workspace = true } ruff_workspace = { workspace = true } console_error_panic_hook = { workspace = true, optional = true } diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 56843a82e0c5a..068975fe8393d 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -8,7 +8,7 @@ use ruff_formatter::printer::SourceMapGeneration; use ruff_formatter::{FormatResult, Formatted, IndentStyle}; use ruff_linter::directives; use ruff_linter::line_width::{IndentWidth, LineLength}; -use ruff_linter::linter::{check_path, LinterResult, TokenSource}; +use ruff_linter::linter::{check_path, LinterResult}; use ruff_linter::registry::AsRule; use ruff_linter::settings::types::PythonVersion; use ruff_linter::settings::{flags, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX}; @@ -16,9 +16,8 @@ use ruff_linter::source_kind::SourceKind; use ruff_python_ast::{Mod, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle}; -use ruff_python_index::{CommentRangesBuilder, Indexer}; -use ruff_python_parser::{parse_tokens, tokenize_all, AsMode, Mode, Program}; -use ruff_python_trivia::CommentRanges; +use ruff_python_index::Indexer; +use ruff_python_parser::{parse, parse_unchecked, parse_unchecked_source, Mode, Parsed}; use ruff_source_file::{Locator, SourceLocation}; use ruff_text_size::Ranged; use ruff_workspace::configuration::Configuration; @@ -160,21 +159,21 @@ impl Workspace { // TODO(dhruvmanila): Support Jupyter Notebooks let source_kind = SourceKind::Python(contents.to_string()); - // Tokenize once. - let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); + // Parse once. + let parsed = parse_unchecked_source(source_kind.source_code(), source_type); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents); // Detect the current code style (lazily). - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); // Extra indices from the code. - let indexer = Indexer::from_tokens(&tokens, &locator); + let indexer = Indexer::from_tokens(parsed.tokens(), &locator); // Extract the `# noqa` and `# isort: skip` directives from the source. let directives = - directives::extract_directives(&tokens, directives::Flags::empty(), &locator, &indexer); + directives::extract_directives(&parsed, directives::Flags::empty(), &locator, &indexer); // Generate checks. let LinterResult { @@ -190,7 +189,7 @@ impl Workspace { flags::Noqa::Enabled, &source_kind, source_type, - TokenSource::Tokens(tokens), + &parsed, ); let source_code = locator.to_source_code(); @@ -242,21 +241,25 @@ impl Workspace { pub fn comments(&self, contents: &str) -> Result { let parsed = ParsedModule::from_source(contents)?; - let comments = pretty_comments(&parsed.module, &parsed.comment_ranges, contents); + let comments = pretty_comments( + parsed.parsed.syntax(), + parsed.parsed.comment_ranges(), + contents, + ); Ok(comments) } /// Parses the content and returns its AST pub fn parse(&self, contents: &str) -> Result { - let program = Program::parse_str(contents, Mode::Module); + let parsed = parse_unchecked(contents, Mode::Module); - Ok(format!("{:#?}", program.into_ast())) + Ok(format!("{:#?}", parsed.into_syntax())) } pub fn tokens(&self, contents: &str) -> Result { - let tokens: Vec<_> = ruff_python_parser::lexer::lex(contents, Mode::Module).collect(); + let parsed = parse_unchecked(contents, Mode::Module); - Ok(format!("{tokens:#?}")) + Ok(format!("{:#?}", parsed.tokens())) } } @@ -266,25 +269,14 @@ pub(crate) fn into_error(err: E) -> Error { struct ParsedModule<'a> { source_code: &'a str, - module: Mod, - comment_ranges: CommentRanges, + parsed: Parsed, } impl<'a> ParsedModule<'a> { fn from_source(source_code: &'a str) -> Result { - let tokens: Vec<_> = tokenize_all(source_code, Mode::Module); - let mut comment_ranges = CommentRangesBuilder::default(); - - for (token, range) in tokens.iter().flatten() { - comment_ranges.visit_token(token, *range); - } - let comment_ranges = comment_ranges.finish(); - let module = parse_tokens(tokens, source_code, Mode::Module).map_err(into_error)?; - Ok(Self { source_code, - module, - comment_ranges, + parsed: parse(source_code, Mode::Module).map_err(into_error)?, }) } @@ -295,11 +287,6 @@ impl<'a> ParsedModule<'a> { .to_format_options(PySourceType::default(), self.source_code) .with_source_map_generation(SourceMapGeneration::Enabled); - format_module_ast( - &self.module, - &self.comment_ranges, - self.source_code, - options, - ) + format_module_ast(&self.parsed, self.source_code, options) } } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 28183f438d96c..f3e74c176f9db 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -24,6 +24,7 @@ ruff_python_index = { path = "../crates/ruff_python_index" } ruff_python_parser = { path = "../crates/ruff_python_parser" } ruff_source_file = { path = "../crates/ruff_source_file" } ruff_python_formatter = { path = "../crates/ruff_python_formatter"} +ruff_text_size = { path = "../crates/ruff_text_size" } arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } diff --git a/fuzz/fuzz_targets/ruff_parse_simple.rs b/fuzz/fuzz_targets/ruff_parse_simple.rs index 657e8c1449ab6..805c04cd6753d 100644 --- a/fuzz/fuzz_targets/ruff_parse_simple.rs +++ b/fuzz/fuzz_targets/ruff_parse_simple.rs @@ -5,8 +5,9 @@ use libfuzzer_sys::{fuzz_target, Corpus}; use ruff_python_codegen::{Generator, Stylist}; -use ruff_python_parser::{lexer, parse_suite, Mode, ParseError}; +use ruff_python_parser::{parse_module, ParseError}; use ruff_source_file::Locator; +use ruff_text_size::Ranged; fn do_fuzz(case: &[u8]) -> Corpus { let Ok(code) = std::str::from_utf8(case) else { @@ -15,8 +16,8 @@ fn do_fuzz(case: &[u8]) -> Corpus { // just round-trip it once to trigger both parse and unparse let locator = Locator::new(code); - let python_ast = match parse_suite(code) { - Ok(stmts) => stmts, + let parsed = match parse_module(code) { + Ok(parsed) => parsed, Err(ParseError { location, .. }) => { let offset = location.start().to_usize(); assert!( @@ -28,38 +29,24 @@ fn do_fuzz(case: &[u8]) -> Corpus { } }; - let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect(); - - for maybe_token in tokens.iter() { - match maybe_token.as_ref() { - Ok((_, range)) => { - let start = range.start().to_usize(); - let end = range.end().to_usize(); - assert!( - code.is_char_boundary(start), - "Invalid start position {} (not at char boundary)", - start - ); - assert!( - code.is_char_boundary(end), - "Invalid end position {} (not at char boundary)", - end - ); - } - Err(err) => { - let offset = err.location().start().to_usize(); - assert!( - code.is_char_boundary(offset), - "Invalid error location {} (not at char boundary)", - offset - ); - } - } + for token in parsed.tokens() { + let start = token.start().to_usize(); + let end = token.end().to_usize(); + assert!( + code.is_char_boundary(start), + "Invalid start position {} (not at char boundary)", + start + ); + assert!( + code.is_char_boundary(end), + "Invalid end position {} (not at char boundary)", + end + ); } - let stylist = Stylist::from_tokens(&tokens, &locator); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); let mut generator: Generator = (&stylist).into(); - generator.unparse_suite(&python_ast); + generator.unparse_suite(parsed.suite()); Corpus::Keep } From 4a155e2b22378843ba57ba6a67916cd043857131 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:28:35 +0530 Subject: [PATCH 21/25] Re-order lexer methods (#11716) ## Summary This PR re-orders the lexer methods in the following order: 1. `next_token` 2. `lex_token` 3. `eat_indentation` 4. `handle_indentation` 5. `skip_whitespace` 6. `consume_ascii_character` 7. `try_single_char_prefix` 8. `try_double_char_prefix` 9. `lex_identifier` 10. `lex_fstring_start` 11. `lex_fstring_middle_or_end` 12. `lex_string` 13. `lex_number` 14. `lex_number_radix` 15. `lex_decimal_number` 16. `radix_run` 17. `lex_comment` 18. `lex_ipython_escape_command` 19. `consume_end` Following was considered for the ordering: * 1 is the main entry point which delegates to 2 * 3, 4, 5 are all related to whitespace which is done first * 6 is the entrypoint for an ascii character which delegates to 9, 12, 13, 17, 18, 19 * Others are grouped around similar kind of methods --- crates/ruff_python_parser/src/lexer.rs | 1628 ++++++++++++------------ 1 file changed, 814 insertions(+), 814 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 8933e4cb748a4..5b5bb3d213f0a 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -125,431 +125,597 @@ impl<'src> Lexer<'src> { self.current_flags } - /// Helper function to push the given error and return the [`TokenKind::Unknown`] token. - fn push_error(&mut self, error: LexicalError) -> TokenKind { - self.errors.push(error); - TokenKind::Unknown + /// Takes the token value corresponding to the current token out of the lexer, replacing it + /// with the default value. + /// + /// All the subsequent call to this method without moving the lexer would always return the + /// default value which is [`TokenValue::None`]. + pub(crate) fn take_value(&mut self) -> TokenValue { + std::mem::take(&mut self.current_value) } - /// Try lexing the single character string prefix, updating the token flags accordingly. - /// Returns `true` if it matches. - fn try_single_char_prefix(&mut self, first: char) -> bool { - match first { - 'f' | 'F' => self.current_flags |= TokenFlags::F_STRING, - 'u' | 'U' => self.current_flags |= TokenFlags::UNICODE_STRING, - 'b' | 'B' => self.current_flags |= TokenFlags::BYTE_STRING, - 'r' => self.current_flags |= TokenFlags::RAW_STRING_LOWERCASE, - 'R' => self.current_flags |= TokenFlags::RAW_STRING_UPPERCASE, - _ => return false, - } - true + /// Lex the next token. + pub fn next_token(&mut self) -> TokenKind { + self.cursor.start_token(); + self.current_value = TokenValue::None; + self.current_flags = TokenFlags::empty(); + self.current_kind = self.lex_token(); + self.current_range = self.token_range(); + self.current_kind } - /// Try lexing the double character string prefix, updating the token flags accordingly. - /// Returns `true` if it matches. - fn try_double_char_prefix(&mut self, value: [char; 2]) -> bool { - match value { - ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { - self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_LOWERCASE; - } - ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { - self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_UPPERCASE; - } - ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { - self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_LOWERCASE; + fn lex_token(&mut self) -> TokenKind { + if let Some(fstring) = self.fstrings.current() { + if !fstring.is_in_expression(self.nesting) { + if let Some(token) = self.lex_fstring_middle_or_end() { + if matches!(token, TokenKind::FStringEnd) { + self.fstrings.pop(); + } + return token; + } } - ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { - self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_UPPERCASE; + } + // Return dedent tokens until the current indentation level matches the indentation of the next token. + else if let Some(indentation) = self.pending_indentation.take() { + match self.indentations.current().try_compare(indentation) { + Ok(Ordering::Greater) => { + self.pending_indentation = Some(indentation); + if self.indentations.dedent_one(indentation).is_err() { + return self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + )); + } + return TokenKind::Dedent; + } + Ok(_) => {} + Err(_) => { + return self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + )); + } } - _ => return false, } - true - } - /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. - fn lex_identifier(&mut self, first: char) -> TokenKind { - // Detect potential string like rb'' b'' f'' u'' r'' - let quote = match (first, self.cursor.first()) { - (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then(|| { - self.cursor.bump(); - quote - }), - (_, second) if is_quote(self.cursor.second()) => { - self.try_double_char_prefix([first, second]).then(|| { - self.cursor.bump(); - // SAFETY: Safe because of the `is_quote` check in this match arm's guard - self.cursor.bump().unwrap() - }) + if self.state.is_after_newline() { + if let Some(indentation) = self.eat_indentation() { + return indentation; } - _ => None, - }; - - if let Some(quote) = quote { - if self.current_flags.is_f_string() { - return self.lex_fstring_start(quote); + } else { + if let Err(error) = self.skip_whitespace() { + return self.push_error(error); } - - return self.lex_string(quote); } - // Keep track of whether the identifier is ASCII-only or not. - // - // This is important because Python applies NFKC normalization to - // identifiers: https://docs.python.org/3/reference/lexical_analysis.html#identifiers. - // We need to therefore do the same in our lexer, but applying NFKC normalization - // unconditionally is extremely expensive. If we know an identifier is ASCII-only, - // (by far the most common case), we can skip NFKC normalization of the identifier. - let mut is_ascii = first.is_ascii(); - self.cursor - .eat_while(|c| is_identifier_continuation(c, &mut is_ascii)); - - let text = self.token_text(); + // The lexer might've skipped whitespaces, so update the start offset + self.cursor.start_token(); - if !is_ascii { - self.current_value = TokenValue::Name(text.nfkc().collect::().into_boxed_str()); - return TokenKind::Name; - } + if let Some(c) = self.cursor.bump() { + if c.is_ascii() { + self.consume_ascii_character(c) + } else if is_unicode_identifier_start(c) { + let identifier = self.lex_identifier(c); + self.state = State::Other; - match text { - "False" => TokenKind::False, - "None" => TokenKind::None, - "True" => TokenKind::True, - "and" => TokenKind::And, - "as" => TokenKind::As, - "assert" => TokenKind::Assert, - "async" => TokenKind::Async, - "await" => TokenKind::Await, - "break" => TokenKind::Break, - "case" => TokenKind::Case, - "class" => TokenKind::Class, - "continue" => TokenKind::Continue, - "def" => TokenKind::Def, - "del" => TokenKind::Del, - "elif" => TokenKind::Elif, - "else" => TokenKind::Else, - "except" => TokenKind::Except, - "finally" => TokenKind::Finally, - "for" => TokenKind::For, - "from" => TokenKind::From, - "global" => TokenKind::Global, - "if" => TokenKind::If, - "import" => TokenKind::Import, - "in" => TokenKind::In, - "is" => TokenKind::Is, - "lambda" => TokenKind::Lambda, - "match" => TokenKind::Match, - "nonlocal" => TokenKind::Nonlocal, - "not" => TokenKind::Not, - "or" => TokenKind::Or, - "pass" => TokenKind::Pass, - "raise" => TokenKind::Raise, - "return" => TokenKind::Return, - "try" => TokenKind::Try, - "type" => TokenKind::Type, - "while" => TokenKind::While, - "with" => TokenKind::With, - "yield" => TokenKind::Yield, - _ => { - self.current_value = TokenValue::Name(text.to_string().into_boxed_str()); - TokenKind::Name + identifier + } else { + self.push_error(LexicalError::new( + LexicalErrorType::UnrecognizedToken { tok: c }, + self.token_range(), + )) } + } else { + // Reached the end of the file. Emit a trailing newline token if not at the beginning of a logical line, + // empty the dedent stack, and finally, return the EndOfFile token. + self.consume_end() } } - /// Numeric lexing. The feast can start! - fn lex_number(&mut self, first: char) -> TokenKind { - if first == '0' { - if self.cursor.eat_if(|c| matches!(c, 'x' | 'X')).is_some() { - self.lex_number_radix(Radix::Hex) - } else if self.cursor.eat_if(|c| matches!(c, 'o' | 'O')).is_some() { - self.lex_number_radix(Radix::Octal) - } else if self.cursor.eat_if(|c| matches!(c, 'b' | 'B')).is_some() { - self.lex_number_radix(Radix::Binary) - } else { - self.lex_decimal_number(first) + fn eat_indentation(&mut self) -> Option { + let mut indentation = Indentation::root(); + + loop { + match self.cursor.first() { + ' ' => { + self.cursor.bump(); + indentation = indentation.add_space(); + } + '\t' => { + self.cursor.bump(); + indentation = indentation.add_tab(); + } + '\\' => { + self.cursor.bump(); + if self.cursor.eat_char('\r') { + self.cursor.eat_char('\n'); + } else if self.cursor.is_eof() { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::Eof, + self.token_range(), + ))); + } else if !self.cursor.eat_char('\n') { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::LineContinuationError, + self.token_range(), + ))); + } + indentation = Indentation::root(); + } + // Form feed + '\x0C' => { + self.cursor.bump(); + indentation = Indentation::root(); + } + _ => break, } - } else { - self.lex_decimal_number(first) } - } - /// Lex a hex/octal/decimal/binary number without a decimal point. - fn lex_number_radix(&mut self, radix: Radix) -> TokenKind { - #[cfg(debug_assertions)] - debug_assert!(matches!( - self.cursor.previous().to_ascii_lowercase(), - 'x' | 'o' | 'b' - )); + // Handle indentation if this is a new, not all empty, logical line + if !matches!(self.cursor.first(), '\n' | '\r' | '#' | EOF_CHAR) { + self.state = State::NonEmptyLogicalLine; - // Lex the portion of the token after the base prefix (e.g., `9D5` in `0x9D5`). - let mut number = LexedText::new(self.offset(), self.source); - self.radix_run(&mut number, radix); + // Set to false so that we don't handle indentation on the next call. + return self.handle_indentation(indentation); + } - // Extract the entire number, including the base prefix (e.g., `0x9D5`). - let token = &self.source[self.token_range()]; + None + } - let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) { - Ok(int) => int, - Err(err) => { - return self.push_error(LexicalError::new( - LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), - self.token_range(), - )); - } - }; - self.current_value = TokenValue::Int(value); - TokenKind::Int - } - - /// Lex a normal number, that is, no octal, hex or binary number. - fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> TokenKind { - #[cfg(debug_assertions)] - debug_assert!(self.cursor.previous().is_ascii_digit() || self.cursor.previous() == '.'); - let start_is_zero = first_digit_or_dot == '0'; + fn handle_indentation(&mut self, indentation: Indentation) -> Option { + let token = match self.indentations.current().try_compare(indentation) { + // Dedent + Ok(Ordering::Greater) => { + self.pending_indentation = Some(indentation); - let mut number = LexedText::new(self.token_start(), self.source); - if first_digit_or_dot != '.' { - number.push(first_digit_or_dot); - self.radix_run(&mut number, Radix::Decimal); - }; + if self.indentations.dedent_one(indentation).is_err() { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + ))); + }; - let is_float = if first_digit_or_dot == '.' || self.cursor.eat_char('.') { - number.push('.'); + // The lexer might've eaten some whitespaces to calculate the `indentation`. For + // example: + // + // ```py + // if first: + // if second: + // pass + // foo + // # ^ + // ``` + // + // Here, the cursor is at `^` and the `indentation` contains the whitespaces before + // the `pass` token. + self.cursor.start_token(); - if self.cursor.eat_char('_') { - return self.push_error(LexicalError::new( - LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()), - TextRange::new(self.offset() - TextSize::new(1), self.offset()), - )); + Some(TokenKind::Dedent) } - self.radix_run(&mut number, Radix::Decimal); - true - } else { - // Normal number: - false - }; - - let is_float = match self.cursor.rest().as_bytes() { - [b'e' | b'E', b'0'..=b'9', ..] | [b'e' | b'E', b'-' | b'+', b'0'..=b'9', ..] => { - // 'e' | 'E' - number.push(self.cursor.bump().unwrap()); - - if let Some(sign) = self.cursor.eat_if(|c| matches!(c, '+' | '-')) { - number.push(sign); - } - - self.radix_run(&mut number, Radix::Decimal); + Ok(Ordering::Equal) => None, - true + // Indent + Ok(Ordering::Less) => { + self.indentations.indent(indentation); + Some(TokenKind::Indent) + } + Err(_) => { + return Some(self.push_error(LexicalError::new( + LexicalErrorType::IndentationError, + self.token_range(), + ))); } - _ => is_float, }; - if is_float { - // Improvement: Use `Cow` instead of pushing to value text - let Ok(value) = f64::from_str(number.as_str()) else { - return self.push_error(LexicalError::new( - LexicalErrorType::OtherError( - "Invalid decimal literal".to_string().into_boxed_str(), - ), - self.token_range(), - )); - }; + token + } - // Parse trailing 'j': - if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { - self.current_value = TokenValue::Complex { - real: 0.0, - imag: value, - }; - TokenKind::Complex - } else { - self.current_value = TokenValue::Float(value); - TokenKind::Float - } - } else { - // Parse trailing 'j': - if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { - let imag = f64::from_str(number.as_str()).unwrap(); - self.current_value = TokenValue::Complex { real: 0.0, imag }; - TokenKind::Complex - } else { - let value = match Int::from_str(number.as_str()) { - Ok(value) => { - if start_is_zero && value.as_u8() != Some(0) { - // Leading zeros in decimal integer literals are not permitted. - return self.push_error(LexicalError::new( - LexicalErrorType::OtherError( - "Invalid decimal integer literal" - .to_string() - .into_boxed_str(), - ), - self.token_range(), - )); - } - value - } - Err(err) => { - return self.push_error(LexicalError::new( - LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), + fn skip_whitespace(&mut self) -> Result<(), LexicalError> { + loop { + match self.cursor.first() { + ' ' => { + self.cursor.bump(); + } + '\t' => { + self.cursor.bump(); + } + '\\' => { + self.cursor.bump(); + if self.cursor.eat_char('\r') { + self.cursor.eat_char('\n'); + } else if self.cursor.is_eof() { + return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + } else if !self.cursor.eat_char('\n') { + return Err(LexicalError::new( + LexicalErrorType::LineContinuationError, self.token_range(), - )) + )); } - }; - self.current_value = TokenValue::Int(value); - TokenKind::Int + } + // Form feed + '\x0C' => { + self.cursor.bump(); + } + _ => break, } } + + Ok(()) } - /// Consume a sequence of numbers with the given radix, - /// the digits can be decorated with underscores - /// like this: '`1_2_3_4`' == '1234' - fn radix_run(&mut self, number: &mut LexedText, radix: Radix) { - loop { - if let Some(c) = self.cursor.eat_if(|c| radix.is_digit(c)) { - number.push(c); + // Dispatch based on the given character. + fn consume_ascii_character(&mut self, c: char) -> TokenKind { + let token = match c { + c if is_ascii_identifier_start(c) => self.lex_identifier(c), + '0'..='9' => self.lex_number(c), + '#' => return self.lex_comment(), + '\'' | '"' => self.lex_string(c), + '=' => { + if self.cursor.eat_char('=') { + TokenKind::EqEqual + } else { + self.state = State::AfterEqual; + return TokenKind::Equal; + } } - // Number that contains `_` separators. Remove them from the parsed text. - else if self.cursor.first() == '_' && radix.is_digit(self.cursor.second()) { - // Skip over `_` - self.cursor.bump(); - number.skip_char(); - } else { - break; + '+' => { + if self.cursor.eat_char('=') { + TokenKind::PlusEqual + } else { + TokenKind::Plus + } + } + '*' => { + if self.cursor.eat_char('=') { + TokenKind::StarEqual + } else if self.cursor.eat_char('*') { + if self.cursor.eat_char('=') { + TokenKind::DoubleStarEqual + } else { + TokenKind::DoubleStar + } + } else { + TokenKind::Star + } } - } - } - /// Lex a single comment. - fn lex_comment(&mut self) -> TokenKind { - #[cfg(debug_assertions)] - debug_assert_eq!(self.cursor.previous(), '#'); + c @ ('%' | '!') + if self.mode == Mode::Ipython + && self.state.is_after_equal() + && self.nesting == 0 => + { + // SAFETY: Safe because `c` has been matched against one of the possible escape command token + self.lex_ipython_escape_command(IpyEscapeKind::try_from(c).unwrap()) + } - let bytes = self.cursor.rest().as_bytes(); - let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len()); - self.cursor.skip_bytes(offset); + c @ ('%' | '!' | '?' | '/' | ';' | ',') + if self.mode == Mode::Ipython && self.state.is_new_logical_line() => + { + let kind = if let Ok(kind) = IpyEscapeKind::try_from([c, self.cursor.first()]) { + self.cursor.bump(); + kind + } else { + // SAFETY: Safe because `c` has been matched against one of the possible escape command token + IpyEscapeKind::try_from(c).unwrap() + }; - TokenKind::Comment - } + self.lex_ipython_escape_command(kind) + } - /// Lex a single IPython escape command. - fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> TokenKind { - let mut value = String::new(); + '?' if self.mode == Mode::Ipython => TokenKind::Question, - loop { - match self.cursor.first() { - '\\' => { - // Only skip the line continuation if it is followed by a newline - // otherwise it is a normal backslash which is part of the magic command: - // - // Skip this backslash - // v - // !pwd \ - // && ls -a | sed 's/^/\\ /' - // ^^ - // Don't skip these backslashes - if self.cursor.second() == '\r' { - self.cursor.bump(); - self.cursor.bump(); - self.cursor.eat_char('\n'); - continue; - } else if self.cursor.second() == '\n' { - self.cursor.bump(); - self.cursor.bump(); - continue; + '/' => { + if self.cursor.eat_char('=') { + TokenKind::SlashEqual + } else if self.cursor.eat_char('/') { + if self.cursor.eat_char('=') { + TokenKind::DoubleSlashEqual + } else { + TokenKind::DoubleSlash } - - self.cursor.bump(); - value.push('\\'); + } else { + TokenKind::Slash } - // Help end escape commands are those that end with 1 or 2 question marks. - // Here, we're only looking for a subset of help end escape commands which - // are the ones that has the escape token at the start of the line as well. - // On the other hand, we're not looking for help end escape commands that - // are strict in the sense that the escape token is only at the end. For example, - // - // * `%foo?` is recognized as a help end escape command but not as a strict one. - // * `foo?` is recognized as a strict help end escape command which is not - // lexed here but is identified at the parser level. - // - // Help end escape commands implemented in the IPython codebase using regex: - // https://github.com/ipython/ipython/blob/292e3a23459ca965b8c1bfe2c3707044c510209a/IPython/core/inputtransformer2.py#L454-L462 - '?' => { - self.cursor.bump(); - let mut question_count = 1u32; - while self.cursor.eat_char('?') { - question_count += 1; + } + '%' => { + if self.cursor.eat_char('=') { + TokenKind::PercentEqual + } else { + TokenKind::Percent + } + } + '|' => { + if self.cursor.eat_char('=') { + TokenKind::VbarEqual + } else { + TokenKind::Vbar + } + } + '^' => { + if self.cursor.eat_char('=') { + TokenKind::CircumflexEqual + } else { + TokenKind::CircumFlex + } + } + '&' => { + if self.cursor.eat_char('=') { + TokenKind::AmperEqual + } else { + TokenKind::Amper + } + } + '-' => { + if self.cursor.eat_char('=') { + TokenKind::MinusEqual + } else if self.cursor.eat_char('>') { + TokenKind::Rarrow + } else { + TokenKind::Minus + } + } + '@' => { + if self.cursor.eat_char('=') { + TokenKind::AtEqual + } else { + TokenKind::At + } + } + '!' => { + if self.cursor.eat_char('=') { + TokenKind::NotEqual + } else { + TokenKind::Exclamation + } + } + '~' => TokenKind::Tilde, + '(' => { + self.nesting += 1; + TokenKind::Lpar + } + ')' => { + self.nesting = self.nesting.saturating_sub(1); + TokenKind::Rpar + } + '[' => { + self.nesting += 1; + TokenKind::Lsqb + } + ']' => { + self.nesting = self.nesting.saturating_sub(1); + TokenKind::Rsqb + } + '{' => { + self.nesting += 1; + TokenKind::Lbrace + } + '}' => { + if let Some(fstring) = self.fstrings.current_mut() { + if fstring.nesting() == self.nesting { + return self.push_error(LexicalError::new( + LexicalErrorType::FStringError(FStringErrorType::SingleRbrace), + self.token_range(), + )); } - - // The original implementation in the IPython codebase is based on regex which - // means that it's strict in the sense that it won't recognize a help end escape: - // * If there's any whitespace before the escape token (e.g. `%foo ?`) - // * If there are more than 2 question mark tokens (e.g. `%foo???`) - // which is what we're doing here as well. In that case, we'll continue with - // the prefixed escape token. - // - // Now, the whitespace and empty value check also makes sure that an empty - // command (e.g. `%?` or `? ??`, no value after/between the escape tokens) - // is not recognized as a help end escape command. So, `%?` and `? ??` are - // `IpyEscapeKind::Magic` and `IpyEscapeKind::Help` because of the initial `%` and `??` - // tokens. - if question_count > 2 - || value.chars().last().map_or(true, is_python_whitespace) - || !matches!(self.cursor.first(), '\n' | '\r' | EOF_CHAR) - { - // Not a help end escape command, so continue with the lexing. - value.reserve(question_count as usize); - for _ in 0..question_count { - value.push('?'); - } - continue; + fstring.try_end_format_spec(self.nesting); + } + self.nesting = self.nesting.saturating_sub(1); + TokenKind::Rbrace + } + ':' => { + if self + .fstrings + .current_mut() + .is_some_and(|fstring| fstring.try_start_format_spec(self.nesting)) + { + TokenKind::Colon + } else if self.cursor.eat_char('=') { + TokenKind::ColonEqual + } else { + TokenKind::Colon + } + } + ';' => TokenKind::Semi, + '<' => { + if self.cursor.eat_char('<') { + if self.cursor.eat_char('=') { + TokenKind::LeftShiftEqual + } else { + TokenKind::LeftShift + } + } else if self.cursor.eat_char('=') { + TokenKind::LessEqual + } else { + TokenKind::Less + } + } + '>' => { + if self.cursor.eat_char('>') { + if self.cursor.eat_char('=') { + TokenKind::RightShiftEqual + } else { + TokenKind::RightShift + } + } else if self.cursor.eat_char('=') { + TokenKind::GreaterEqual + } else { + TokenKind::Greater + } + } + ',' => TokenKind::Comma, + '.' => { + if self.cursor.first().is_ascii_digit() { + self.lex_decimal_number('.') + } else if self.cursor.eat_char2('.', '.') { + TokenKind::Ellipsis + } else { + TokenKind::Dot + } + } + '\n' => { + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); } + TokenKind::NonLogicalNewline + } + } + '\r' => { + self.cursor.eat_char('\n'); - if escape_kind.is_help() { - // If we've recognize this as a help end escape command, then - // any question mark token / whitespaces at the start are not - // considered as part of the value. - // - // For example, `??foo?` is recognized as `IpyEscapeKind::Help` and - // `value` is `foo` instead of `??foo`. - value = value.trim_start_matches([' ', '?']).to_string(); - } else if escape_kind.is_magic() { - // Between `%` and `?` (at the end), the `?` takes priority - // over the `%` so `%foo?` is recognized as `IpyEscapeKind::Help` - // and `value` is `%foo` instead of `foo`. So, we need to - // insert the magic escape token at the start. - value.insert_str(0, escape_kind.as_str()); + return if self.nesting == 0 && !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); } + TokenKind::NonLogicalNewline + }; + } - let kind = match question_count { - 1 => IpyEscapeKind::Help, - 2 => IpyEscapeKind::Help2, - _ => unreachable!("`question_count` is always 1 or 2"), - }; + _ => { + self.state = State::Other; - self.current_value = TokenValue::IpyEscapeCommand { - kind, - value: value.into_boxed_str(), - }; + return self.push_error(LexicalError::new( + LexicalErrorType::UnrecognizedToken { tok: c }, + self.token_range(), + )); + } + }; - return TokenKind::IpyEscapeCommand; - } - '\n' | '\r' | EOF_CHAR => { - self.current_value = TokenValue::IpyEscapeCommand { - kind: escape_kind, - value: value.into_boxed_str(), - }; + self.state = State::Other; + + token + } + + /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. + fn lex_identifier(&mut self, first: char) -> TokenKind { + // Detect potential string like rb'' b'' f'' u'' r'' + let quote = match (first, self.cursor.first()) { + (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then(|| { + self.cursor.bump(); + quote + }), + (_, second) if is_quote(self.cursor.second()) => { + self.try_double_char_prefix([first, second]).then(|| { + self.cursor.bump(); + // SAFETY: Safe because of the `is_quote` check in this match arm's guard + self.cursor.bump().unwrap() + }) + } + _ => None, + }; + + if let Some(quote) = quote { + if self.current_flags.is_f_string() { + return self.lex_fstring_start(quote); + } + + return self.lex_string(quote); + } + + // Keep track of whether the identifier is ASCII-only or not. + // + // This is important because Python applies NFKC normalization to + // identifiers: https://docs.python.org/3/reference/lexical_analysis.html#identifiers. + // We need to therefore do the same in our lexer, but applying NFKC normalization + // unconditionally is extremely expensive. If we know an identifier is ASCII-only, + // (by far the most common case), we can skip NFKC normalization of the identifier. + let mut is_ascii = first.is_ascii(); + self.cursor + .eat_while(|c| is_identifier_continuation(c, &mut is_ascii)); + + let text = self.token_text(); + + if !is_ascii { + self.current_value = TokenValue::Name(text.nfkc().collect::().into_boxed_str()); + return TokenKind::Name; + } + + match text { + "False" => TokenKind::False, + "None" => TokenKind::None, + "True" => TokenKind::True, + "and" => TokenKind::And, + "as" => TokenKind::As, + "assert" => TokenKind::Assert, + "async" => TokenKind::Async, + "await" => TokenKind::Await, + "break" => TokenKind::Break, + "case" => TokenKind::Case, + "class" => TokenKind::Class, + "continue" => TokenKind::Continue, + "def" => TokenKind::Def, + "del" => TokenKind::Del, + "elif" => TokenKind::Elif, + "else" => TokenKind::Else, + "except" => TokenKind::Except, + "finally" => TokenKind::Finally, + "for" => TokenKind::For, + "from" => TokenKind::From, + "global" => TokenKind::Global, + "if" => TokenKind::If, + "import" => TokenKind::Import, + "in" => TokenKind::In, + "is" => TokenKind::Is, + "lambda" => TokenKind::Lambda, + "match" => TokenKind::Match, + "nonlocal" => TokenKind::Nonlocal, + "not" => TokenKind::Not, + "or" => TokenKind::Or, + "pass" => TokenKind::Pass, + "raise" => TokenKind::Raise, + "return" => TokenKind::Return, + "try" => TokenKind::Try, + "type" => TokenKind::Type, + "while" => TokenKind::While, + "with" => TokenKind::With, + "yield" => TokenKind::Yield, + _ => { + self.current_value = TokenValue::Name(text.to_string().into_boxed_str()); + TokenKind::Name + } + } + } - return TokenKind::IpyEscapeCommand; - } - c => { - self.cursor.bump(); - value.push(c); - } + /// Try lexing the single character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_single_char_prefix(&mut self, first: char) -> bool { + match first { + 'f' | 'F' => self.current_flags |= TokenFlags::F_STRING, + 'u' | 'U' => self.current_flags |= TokenFlags::UNICODE_STRING, + 'b' | 'B' => self.current_flags |= TokenFlags::BYTE_STRING, + 'r' => self.current_flags |= TokenFlags::RAW_STRING_LOWERCASE, + 'R' => self.current_flags |= TokenFlags::RAW_STRING_UPPERCASE, + _ => return false, + } + true + } + + /// Try lexing the double character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_double_char_prefix(&mut self, value: [char; 2]) -> bool { + match value { + ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_LOWERCASE; + } + ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_UPPERCASE; + } + ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_LOWERCASE; + } + ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_UPPERCASE; } + _ => return false, } + true } /// Lex a f-string start token. @@ -704,8 +870,8 @@ impl<'src> Lexer<'src> { }; self.current_value = TokenValue::FStringMiddle(value.into_boxed_str()); - self.current_flags = fstring.flags(); + self.current_flags = fstring.flags(); Some(TokenKind::FStringMiddle) } @@ -820,485 +986,322 @@ impl<'src> Lexer<'src> { TokenKind::String } - /// Lex the next token. - pub fn next_token(&mut self) -> TokenKind { - self.cursor.start_token(); - self.current_value = TokenValue::None; - self.current_flags = TokenFlags::empty(); - self.current_kind = self.lex_token(); - self.current_range = self.token_range(); - self.current_kind - } - - fn lex_token(&mut self) -> TokenKind { - if let Some(fstring) = self.fstrings.current() { - if !fstring.is_in_expression(self.nesting) { - if let Some(token) = self.lex_fstring_middle_or_end() { - if matches!(token, TokenKind::FStringEnd) { - self.fstrings.pop(); - } - return token; - } - } - } - // Return dedent tokens until the current indentation level matches the indentation of the next token. - else if let Some(indentation) = self.pending_indentation.take() { - match self.indentations.current().try_compare(indentation) { - Ok(Ordering::Greater) => { - self.pending_indentation = Some(indentation); - if self.indentations.dedent_one(indentation).is_err() { - return self.push_error(LexicalError::new( - LexicalErrorType::IndentationError, - self.token_range(), - )); - } - return TokenKind::Dedent; - } - Ok(_) => {} - Err(_) => { - return self.push_error(LexicalError::new( - LexicalErrorType::IndentationError, - self.token_range(), - )); - } - } - } - - if self.state.is_after_newline() { - if let Some(indentation) = self.eat_indentation() { - return indentation; - } - } else { - if let Err(error) = self.skip_whitespace() { - return self.push_error(error); - } - } - - // The lexer might've skipped whitespaces, so update the start offset - self.cursor.start_token(); - - if let Some(c) = self.cursor.bump() { - if c.is_ascii() { - self.consume_ascii_character(c) - } else if is_unicode_identifier_start(c) { - let identifier = self.lex_identifier(c); - self.state = State::Other; - - identifier + /// Numeric lexing. The feast can start! + fn lex_number(&mut self, first: char) -> TokenKind { + if first == '0' { + if self.cursor.eat_if(|c| matches!(c, 'x' | 'X')).is_some() { + self.lex_number_radix(Radix::Hex) + } else if self.cursor.eat_if(|c| matches!(c, 'o' | 'O')).is_some() { + self.lex_number_radix(Radix::Octal) + } else if self.cursor.eat_if(|c| matches!(c, 'b' | 'B')).is_some() { + self.lex_number_radix(Radix::Binary) } else { - self.push_error(LexicalError::new( - LexicalErrorType::UnrecognizedToken { tok: c }, - self.token_range(), - )) + self.lex_decimal_number(first) } } else { - // Reached the end of the file. Emit a trailing newline token if not at the beginning of a logical line, - // empty the dedent stack, and finally, return the EndOfFile token. - self.consume_end() - } - } - - fn skip_whitespace(&mut self) -> Result<(), LexicalError> { - loop { - match self.cursor.first() { - ' ' => { - self.cursor.bump(); - } - '\t' => { - self.cursor.bump(); - } - '\\' => { - self.cursor.bump(); - if self.cursor.eat_char('\r') { - self.cursor.eat_char('\n'); - } else if self.cursor.is_eof() { - return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range())); - } else if !self.cursor.eat_char('\n') { - return Err(LexicalError::new( - LexicalErrorType::LineContinuationError, - self.token_range(), - )); - } - } - // Form feed - '\x0C' => { - self.cursor.bump(); - } - _ => break, - } - } - - Ok(()) - } - - fn eat_indentation(&mut self) -> Option { - let mut indentation = Indentation::root(); - - loop { - match self.cursor.first() { - ' ' => { - self.cursor.bump(); - indentation = indentation.add_space(); - } - '\t' => { - self.cursor.bump(); - indentation = indentation.add_tab(); - } - '\\' => { - self.cursor.bump(); - if self.cursor.eat_char('\r') { - self.cursor.eat_char('\n'); - } else if self.cursor.is_eof() { - return Some(self.push_error(LexicalError::new( - LexicalErrorType::Eof, - self.token_range(), - ))); - } else if !self.cursor.eat_char('\n') { - return Some(self.push_error(LexicalError::new( - LexicalErrorType::LineContinuationError, - self.token_range(), - ))); - } - indentation = Indentation::root(); - } - // Form feed - '\x0C' => { - self.cursor.bump(); - indentation = Indentation::root(); - } - _ => break, - } - } - - // Handle indentation if this is a new, not all empty, logical line - if !matches!(self.cursor.first(), '\n' | '\r' | '#' | EOF_CHAR) { - self.state = State::NonEmptyLogicalLine; - - // Set to false so that we don't handle indentation on the next call. - return self.handle_indentation(indentation); + self.lex_decimal_number(first) } - - None } - fn handle_indentation(&mut self, indentation: Indentation) -> Option { - let token = match self.indentations.current().try_compare(indentation) { - // Dedent - Ok(Ordering::Greater) => { - self.pending_indentation = Some(indentation); - - if self.indentations.dedent_one(indentation).is_err() { - return Some(self.push_error(LexicalError::new( - LexicalErrorType::IndentationError, - self.token_range(), - ))); - }; - - // The lexer might've eaten some whitespaces to calculate the `indentation`. For - // example: - // - // ```py - // if first: - // if second: - // pass - // foo - // # ^ - // ``` - // - // Here, the cursor is at `^` and the `indentation` contains the whitespaces before - // the `pass` token. - self.cursor.start_token(); - - Some(TokenKind::Dedent) - } + /// Lex a hex/octal/decimal/binary number without a decimal point. + fn lex_number_radix(&mut self, radix: Radix) -> TokenKind { + #[cfg(debug_assertions)] + debug_assert!(matches!( + self.cursor.previous().to_ascii_lowercase(), + 'x' | 'o' | 'b' + )); - Ok(Ordering::Equal) => None, + // Lex the portion of the token after the base prefix (e.g., `9D5` in `0x9D5`). + let mut number = LexedText::new(self.offset(), self.source); + self.radix_run(&mut number, radix); - // Indent - Ok(Ordering::Less) => { - self.indentations.indent(indentation); - Some(TokenKind::Indent) - } - Err(_) => { - return Some(self.push_error(LexicalError::new( - LexicalErrorType::IndentationError, + // Extract the entire number, including the base prefix (e.g., `0x9D5`). + let token = &self.source[self.token_range()]; + + let value = match Int::from_str_radix(number.as_str(), radix.as_u32(), token) { + Ok(int) => int, + Err(err) => { + return self.push_error(LexicalError::new( + LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), - ))); + )); } }; - - token + self.current_value = TokenValue::Int(value); + TokenKind::Int } - fn consume_end(&mut self) -> TokenKind { - // We reached end of file. - // First of all, we need all nestings to be finished. - if self.nesting > 0 { - // Reset the nesting to avoid going into infinite loop. - self.nesting = 0; - return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); - } + /// Lex a normal number, that is, no octal, hex or binary number. + fn lex_decimal_number(&mut self, first_digit_or_dot: char) -> TokenKind { + #[cfg(debug_assertions)] + debug_assert!(self.cursor.previous().is_ascii_digit() || self.cursor.previous() == '.'); + let start_is_zero = first_digit_or_dot == '0'; - // Next, insert a trailing newline, if required. - if !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - TokenKind::Newline - } - // Next, flush the indentation stack to zero. - else if self.indentations.dedent().is_some() { - TokenKind::Dedent - } else { - TokenKind::EndOfFile - } - } + let mut number = LexedText::new(self.token_start(), self.source); + if first_digit_or_dot != '.' { + number.push(first_digit_or_dot); + self.radix_run(&mut number, Radix::Decimal); + }; - // Dispatch based on the given character. - fn consume_ascii_character(&mut self, c: char) -> TokenKind { - let token = match c { - c if is_ascii_identifier_start(c) => self.lex_identifier(c), - '0'..='9' => self.lex_number(c), - '#' => return self.lex_comment(), - '\'' | '"' => self.lex_string(c), - '=' => { - if self.cursor.eat_char('=') { - TokenKind::EqEqual - } else { - self.state = State::AfterEqual; - return TokenKind::Equal; - } - } - '+' => { - if self.cursor.eat_char('=') { - TokenKind::PlusEqual - } else { - TokenKind::Plus - } - } - '*' => { - if self.cursor.eat_char('=') { - TokenKind::StarEqual - } else if self.cursor.eat_char('*') { - if self.cursor.eat_char('=') { - TokenKind::DoubleStarEqual - } else { - TokenKind::DoubleStar - } - } else { - TokenKind::Star - } - } + let is_float = if first_digit_or_dot == '.' || self.cursor.eat_char('.') { + number.push('.'); - c @ ('%' | '!') - if self.mode == Mode::Ipython - && self.state.is_after_equal() - && self.nesting == 0 => - { - // SAFETY: Safe because `c` has been matched against one of the possible escape command token - self.lex_ipython_escape_command(IpyEscapeKind::try_from(c).unwrap()) + if self.cursor.eat_char('_') { + return self.push_error(LexicalError::new( + LexicalErrorType::OtherError("Invalid Syntax".to_string().into_boxed_str()), + TextRange::new(self.offset() - TextSize::new(1), self.offset()), + )); } - c @ ('%' | '!' | '?' | '/' | ';' | ',') - if self.mode == Mode::Ipython && self.state.is_new_logical_line() => - { - let kind = if let Ok(kind) = IpyEscapeKind::try_from([c, self.cursor.first()]) { - self.cursor.bump(); - kind - } else { - // SAFETY: Safe because `c` has been matched against one of the possible escape command token - IpyEscapeKind::try_from(c).unwrap() - }; - - self.lex_ipython_escape_command(kind) - } + self.radix_run(&mut number, Radix::Decimal); + true + } else { + // Normal number: + false + }; - '?' if self.mode == Mode::Ipython => TokenKind::Question, + let is_float = match self.cursor.rest().as_bytes() { + [b'e' | b'E', b'0'..=b'9', ..] | [b'e' | b'E', b'-' | b'+', b'0'..=b'9', ..] => { + // 'e' | 'E' + number.push(self.cursor.bump().unwrap()); - '/' => { - if self.cursor.eat_char('=') { - TokenKind::SlashEqual - } else if self.cursor.eat_char('/') { - if self.cursor.eat_char('=') { - TokenKind::DoubleSlashEqual - } else { - TokenKind::DoubleSlash - } - } else { - TokenKind::Slash - } - } - '%' => { - if self.cursor.eat_char('=') { - TokenKind::PercentEqual - } else { - TokenKind::Percent - } - } - '|' => { - if self.cursor.eat_char('=') { - TokenKind::VbarEqual - } else { - TokenKind::Vbar - } - } - '^' => { - if self.cursor.eat_char('=') { - TokenKind::CircumflexEqual - } else { - TokenKind::CircumFlex - } - } - '&' => { - if self.cursor.eat_char('=') { - TokenKind::AmperEqual - } else { - TokenKind::Amper - } - } - '-' => { - if self.cursor.eat_char('=') { - TokenKind::MinusEqual - } else if self.cursor.eat_char('>') { - TokenKind::Rarrow - } else { - TokenKind::Minus - } - } - '@' => { - if self.cursor.eat_char('=') { - TokenKind::AtEqual - } else { - TokenKind::At - } - } - '!' => { - if self.cursor.eat_char('=') { - TokenKind::NotEqual - } else { - TokenKind::Exclamation + if let Some(sign) = self.cursor.eat_if(|c| matches!(c, '+' | '-')) { + number.push(sign); } + + self.radix_run(&mut number, Radix::Decimal); + + true } - '~' => TokenKind::Tilde, - '(' => { - self.nesting += 1; - TokenKind::Lpar - } - ')' => { - self.nesting = self.nesting.saturating_sub(1); - TokenKind::Rpar - } - '[' => { - self.nesting += 1; - TokenKind::Lsqb - } - ']' => { - self.nesting = self.nesting.saturating_sub(1); - TokenKind::Rsqb - } - '{' => { - self.nesting += 1; - TokenKind::Lbrace + _ => is_float, + }; + + if is_float { + // Improvement: Use `Cow` instead of pushing to value text + let Ok(value) = f64::from_str(number.as_str()) else { + return self.push_error(LexicalError::new( + LexicalErrorType::OtherError( + "Invalid decimal literal".to_string().into_boxed_str(), + ), + self.token_range(), + )); + }; + + // Parse trailing 'j': + if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { + self.current_value = TokenValue::Complex { + real: 0.0, + imag: value, + }; + TokenKind::Complex + } else { + self.current_value = TokenValue::Float(value); + TokenKind::Float } - '}' => { - if let Some(fstring) = self.fstrings.current_mut() { - if fstring.nesting() == self.nesting { + } else { + // Parse trailing 'j': + if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() { + let imag = f64::from_str(number.as_str()).unwrap(); + self.current_value = TokenValue::Complex { real: 0.0, imag }; + TokenKind::Complex + } else { + let value = match Int::from_str(number.as_str()) { + Ok(value) => { + if start_is_zero && value.as_u8() != Some(0) { + // Leading zeros in decimal integer literals are not permitted. + return self.push_error(LexicalError::new( + LexicalErrorType::OtherError( + "Invalid decimal integer literal" + .to_string() + .into_boxed_str(), + ), + self.token_range(), + )); + } + value + } + Err(err) => { return self.push_error(LexicalError::new( - LexicalErrorType::FStringError(FStringErrorType::SingleRbrace), + LexicalErrorType::OtherError(format!("{err:?}").into_boxed_str()), self.token_range(), - )); + )) } - fstring.try_end_format_spec(self.nesting); - } - self.nesting = self.nesting.saturating_sub(1); - TokenKind::Rbrace + }; + self.current_value = TokenValue::Int(value); + TokenKind::Int } - ':' => { - if self - .fstrings - .current_mut() - .is_some_and(|fstring| fstring.try_start_format_spec(self.nesting)) - { - TokenKind::Colon - } else if self.cursor.eat_char('=') { - TokenKind::ColonEqual - } else { - TokenKind::Colon - } + } + } + + /// Consume a sequence of numbers with the given radix, + /// the digits can be decorated with underscores + /// like this: '`1_2_3_4`' == '1234' + fn radix_run(&mut self, number: &mut LexedText, radix: Radix) { + loop { + if let Some(c) = self.cursor.eat_if(|c| radix.is_digit(c)) { + number.push(c); } - ';' => TokenKind::Semi, - '<' => { - if self.cursor.eat_char('<') { - if self.cursor.eat_char('=') { - TokenKind::LeftShiftEqual - } else { - TokenKind::LeftShift - } - } else if self.cursor.eat_char('=') { - TokenKind::LessEqual - } else { - TokenKind::Less - } + // Number that contains `_` separators. Remove them from the parsed text. + else if self.cursor.first() == '_' && radix.is_digit(self.cursor.second()) { + // Skip over `_` + self.cursor.bump(); + number.skip_char(); + } else { + break; } - '>' => { - if self.cursor.eat_char('>') { - if self.cursor.eat_char('=') { - TokenKind::RightShiftEqual - } else { - TokenKind::RightShift + } + } + + /// Lex a single comment. + fn lex_comment(&mut self) -> TokenKind { + #[cfg(debug_assertions)] + debug_assert_eq!(self.cursor.previous(), '#'); + + let bytes = self.cursor.rest().as_bytes(); + let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len()); + self.cursor.skip_bytes(offset); + + TokenKind::Comment + } + + /// Lex a single IPython escape command. + fn lex_ipython_escape_command(&mut self, escape_kind: IpyEscapeKind) -> TokenKind { + let mut value = String::new(); + + loop { + match self.cursor.first() { + '\\' => { + // Only skip the line continuation if it is followed by a newline + // otherwise it is a normal backslash which is part of the magic command: + // + // Skip this backslash + // v + // !pwd \ + // && ls -a | sed 's/^/\\ /' + // ^^ + // Don't skip these backslashes + if self.cursor.second() == '\r' { + self.cursor.bump(); + self.cursor.bump(); + self.cursor.eat_char('\n'); + continue; + } else if self.cursor.second() == '\n' { + self.cursor.bump(); + self.cursor.bump(); + continue; } - } else if self.cursor.eat_char('=') { - TokenKind::GreaterEqual - } else { - TokenKind::Greater - } - } - ',' => TokenKind::Comma, - '.' => { - if self.cursor.first().is_ascii_digit() { - self.lex_decimal_number('.') - } else if self.cursor.eat_char2('.', '.') { - TokenKind::Ellipsis - } else { - TokenKind::Dot + + self.cursor.bump(); + value.push('\\'); } - } - '\n' => { - return if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - TokenKind::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); + // Help end escape commands are those that end with 1 or 2 question marks. + // Here, we're only looking for a subset of help end escape commands which + // are the ones that has the escape token at the start of the line as well. + // On the other hand, we're not looking for help end escape commands that + // are strict in the sense that the escape token is only at the end. For example, + // + // * `%foo?` is recognized as a help end escape command but not as a strict one. + // * `foo?` is recognized as a strict help end escape command which is not + // lexed here but is identified at the parser level. + // + // Help end escape commands implemented in the IPython codebase using regex: + // https://github.com/ipython/ipython/blob/292e3a23459ca965b8c1bfe2c3707044c510209a/IPython/core/inputtransformer2.py#L454-L462 + '?' => { + self.cursor.bump(); + let mut question_count = 1u32; + while self.cursor.eat_char('?') { + question_count += 1; } - TokenKind::NonLogicalNewline - } - } - '\r' => { - self.cursor.eat_char('\n'); - return if self.nesting == 0 && !self.state.is_new_logical_line() { - self.state = State::AfterNewline; - TokenKind::Newline - } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); + // The original implementation in the IPython codebase is based on regex which + // means that it's strict in the sense that it won't recognize a help end escape: + // * If there's any whitespace before the escape token (e.g. `%foo ?`) + // * If there are more than 2 question mark tokens (e.g. `%foo???`) + // which is what we're doing here as well. In that case, we'll continue with + // the prefixed escape token. + // + // Now, the whitespace and empty value check also makes sure that an empty + // command (e.g. `%?` or `? ??`, no value after/between the escape tokens) + // is not recognized as a help end escape command. So, `%?` and `? ??` are + // `IpyEscapeKind::Magic` and `IpyEscapeKind::Help` because of the initial `%` and `??` + // tokens. + if question_count > 2 + || value.chars().last().map_or(true, is_python_whitespace) + || !matches!(self.cursor.first(), '\n' | '\r' | EOF_CHAR) + { + // Not a help end escape command, so continue with the lexing. + value.reserve(question_count as usize); + for _ in 0..question_count { + value.push('?'); + } + continue; } - TokenKind::NonLogicalNewline - }; - } - _ => { - self.state = State::Other; + if escape_kind.is_help() { + // If we've recognize this as a help end escape command, then + // any question mark token / whitespaces at the start are not + // considered as part of the value. + // + // For example, `??foo?` is recognized as `IpyEscapeKind::Help` and + // `value` is `foo` instead of `??foo`. + value = value.trim_start_matches([' ', '?']).to_string(); + } else if escape_kind.is_magic() { + // Between `%` and `?` (at the end), the `?` takes priority + // over the `%` so `%foo?` is recognized as `IpyEscapeKind::Help` + // and `value` is `%foo` instead of `foo`. So, we need to + // insert the magic escape token at the start. + value.insert_str(0, escape_kind.as_str()); + } - return self.push_error(LexicalError::new( - LexicalErrorType::UnrecognizedToken { tok: c }, - self.token_range(), - )); + let kind = match question_count { + 1 => IpyEscapeKind::Help, + 2 => IpyEscapeKind::Help2, + _ => unreachable!("`question_count` is always 1 or 2"), + }; + + self.current_value = TokenValue::IpyEscapeCommand { + kind, + value: value.into_boxed_str(), + }; + + return TokenKind::IpyEscapeCommand; + } + '\n' | '\r' | EOF_CHAR => { + self.current_value = TokenValue::IpyEscapeCommand { + kind: escape_kind, + value: value.into_boxed_str(), + }; + + return TokenKind::IpyEscapeCommand; + } + c => { + self.cursor.bump(); + value.push(c); + } } - }; + } + } - self.state = State::Other; + fn consume_end(&mut self) -> TokenKind { + // We reached end of file. + // First of all, we need all nestings to be finished. + if self.nesting > 0 { + // Reset the nesting to avoid going into infinite loop. + self.nesting = 0; + return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range())); + } - token + // Next, insert a trailing newline, if required. + if !self.state.is_new_logical_line() { + self.state = State::AfterNewline; + TokenKind::Newline + } + // Next, flush the indentation stack to zero. + else if self.indentations.dedent().is_some() { + TokenKind::Dedent + } else { + TokenKind::EndOfFile + } } #[inline] @@ -1327,13 +1330,10 @@ impl<'src> Lexer<'src> { self.token_range().start() } - /// Takes the token value corresponding to the current token out of the lexer, replacing it - /// with the default value. - /// - /// All the subsequent call to this method without moving the lexer would always return the - /// default value which is [`TokenValue::None`]. - pub(crate) fn take_value(&mut self) -> TokenValue { - std::mem::take(&mut self.current_value) + /// Helper function to push the given error and return the [`TokenKind::Unknown`] token. + fn push_error(&mut self, error: LexicalError) -> TokenKind { + self.errors.push(error); + TokenKind::Unknown } /// Creates a checkpoint to which the lexer can later return to using [`Self::rewind`]. From f4e23d2dffb9ed798a8e98d996b786ee1def659b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:34:03 +0530 Subject: [PATCH 22/25] Use string expression for parsing type annotation (#11717) ## Summary This PR updates the logic for parsing type annotation to accept a `ExprStringLiteral` node instead of the string value and the range. The main motivation of this change is to simplify the implementation of `parse_type_annotation` function with: * Use the `opener_len` and `closer_len` from the string flags to get the raw contents range instead of extracting it via * `str::leading_quote(expression).unwrap().text_len()` * `str::trailing_quote(expression).unwrap().text_len()` * Avoid comparing the string content if we already know that it's implicitly concatenated ## Test Plan `cargo insta test` --- .../ruff_linter/src/checkers/ast/deferred.rs | 5 +- crates/ruff_linter/src/checkers/ast/mod.rs | 41 ++++++----- .../flake8_annotations/rules/definition.rs | 4 +- .../src/rules/ruff/rules/implicit_optional.rs | 8 +-- crates/ruff_linter/src/rules/ruff/typing.rs | 4 +- crates/ruff_python_parser/src/typing.rs | 72 ++++++++++++------- 6 files changed, 78 insertions(+), 56 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/deferred.rs b/crates/ruff_linter/src/checkers/ast/deferred.rs index 7f390e7afd577..01043e77d4505 100644 --- a/crates/ruff_linter/src/checkers/ast/deferred.rs +++ b/crates/ruff_linter/src/checkers/ast/deferred.rs @@ -1,13 +1,12 @@ -use ruff_python_ast::Expr; +use ruff_python_ast::{Expr, ExprStringLiteral}; use ruff_python_semantic::{ScopeId, Snapshot}; -use ruff_text_size::TextRange; /// A collection of AST nodes that are deferred for later visitation. Used to, e.g., store /// functions, whose bodies shouldn't be visited until all module-level definitions have been /// visited. #[derive(Debug, Default)] pub(crate) struct Visit<'a> { - pub(crate) string_type_definitions: Vec<(TextRange, &'a str, Snapshot)>, + pub(crate) string_type_definitions: Vec<(&'a ExprStringLiteral, Snapshot)>, pub(crate) future_type_definitions: Vec<(&'a Expr, Snapshot)>, pub(crate) type_param_definitions: Vec<(&'a Expr, Snapshot)>, pub(crate) functions: Vec, diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index 5f26244df7fff..73b3d607ed878 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -1011,12 +1011,10 @@ impl<'a> Visitor<'a> for Checker<'a> { && self.semantic.future_annotations_or_stub() && (self.semantic.in_annotation() || self.source_type.is_stub()) { - if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = expr { - self.visit.string_type_definitions.push(( - expr.range(), - value.to_str(), - self.semantic.snapshot(), - )); + if let Expr::StringLiteral(string_literal) = expr { + self.visit + .string_type_definitions + .push((string_literal, self.semantic.snapshot())); } else { self.visit .future_type_definitions @@ -1426,13 +1424,11 @@ impl<'a> Visitor<'a> for Checker<'a> { } } } - Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => { + Expr::StringLiteral(string_literal) => { if self.semantic.in_type_definition() && !self.semantic.in_typing_literal() { - self.visit.string_type_definitions.push(( - expr.range(), - value.to_str(), - self.semantic.snapshot(), - )); + self.visit + .string_type_definitions + .push((string_literal, self.semantic.snapshot())); } } Expr::FString(_) => { @@ -2156,22 +2152,25 @@ impl<'a> Checker<'a> { let snapshot = self.semantic.snapshot(); while !self.visit.string_type_definitions.is_empty() { let type_definitions = std::mem::take(&mut self.visit.string_type_definitions); - for (range, value, snapshot) in type_definitions { - if let Ok((expr, kind)) = - parse_type_annotation(value, range, self.locator.contents()) + for (string_expr, snapshot) in type_definitions { + if let Ok((parsed_annotation, kind)) = + parse_type_annotation(string_expr, self.locator.contents()) { - let expr = allocator.alloc(expr); + let parsed_annotation = allocator.alloc(parsed_annotation); + + let annotation = string_expr.value.to_str(); + let range = string_expr.range(); self.semantic.restore(snapshot); if self.semantic.in_annotation() && self.semantic.in_typing_only_annotation() { if self.enabled(Rule::QuotedAnnotation) { - pyupgrade::rules::quoted_annotation(self, value, range); + pyupgrade::rules::quoted_annotation(self, annotation, range); } } if self.source_type.is_stub() { if self.enabled(Rule::QuotedAnnotationInStub) { - flake8_pyi::rules::quoted_annotation_in_stub(self, value, range); + flake8_pyi::rules::quoted_annotation_in_stub(self, annotation, range); } } @@ -2184,14 +2183,14 @@ impl<'a> Checker<'a> { self.semantic.flags |= SemanticModelFlags::TYPE_DEFINITION | type_definition_flag; - self.visit_expr(expr); + self.visit_expr(parsed_annotation); } else { if self.enabled(Rule::ForwardAnnotationSyntaxError) { self.diagnostics.push(Diagnostic::new( pyflakes::rules::ForwardAnnotationSyntaxError { - body: value.to_string(), + body: string_expr.value.to_string(), }, - range, + string_expr.range(), )); } } diff --git a/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs b/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs index 25b0119a656af..ad9e5706624a6 100644 --- a/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs +++ b/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs @@ -512,10 +512,10 @@ fn check_dynamically_typed( ) where F: FnOnce() -> String, { - if let Expr::StringLiteral(ast::ExprStringLiteral { range, value }) = annotation { + if let Expr::StringLiteral(string_expr) = annotation { // Quoted annotations if let Ok((parsed_annotation, _)) = - parse_type_annotation(value.to_str(), *range, checker.locator().contents()) + parse_type_annotation(string_expr, checker.locator().contents()) { if type_hint_resolves_to_any( &parsed_annotation, diff --git a/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs b/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs index 180409066f05a..b2012a0111123 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs @@ -177,13 +177,13 @@ pub(crate) fn implicit_optional(checker: &mut Checker, parameters: &Parameters) continue; }; - if let Expr::StringLiteral(ast::ExprStringLiteral { range, value }) = annotation.as_ref() { + if let Expr::StringLiteral(string_expr) = annotation.as_ref() { // Quoted annotation. - if let Ok((annotation, kind)) = - parse_type_annotation(value.to_str(), *range, checker.locator().contents()) + if let Ok((parsed_annotation, kind)) = + parse_type_annotation(string_expr, checker.locator().contents()) { let Some(expr) = type_hint_explicitly_allows_none( - &annotation, + &parsed_annotation, checker.semantic(), checker.locator(), checker.settings.target_version.minor(), diff --git a/crates/ruff_linter/src/rules/ruff/typing.rs b/crates/ruff_linter/src/rules/ruff/typing.rs index 7668a18ebac06..f3422f29abce9 100644 --- a/crates/ruff_linter/src/rules/ruff/typing.rs +++ b/crates/ruff_linter/src/rules/ruff/typing.rs @@ -112,8 +112,8 @@ impl<'a> TypingTarget<'a> { .. }) => Some(TypingTarget::PEP604Union(left, right)), Expr::NoneLiteral(_) => Some(TypingTarget::None), - Expr::StringLiteral(ast::ExprStringLiteral { value, range }) => { - parse_type_annotation(value.to_str(), *range, locator.contents()) + Expr::StringLiteral(string_expr) => { + parse_type_annotation(string_expr, locator.contents()) .map_or(None, |(expr, _)| Some(TypingTarget::ForwardReference(expr))) } _ => semantic.resolve_qualified_name(expr).map_or( diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index 02ebf3243c0b3..a848d538dc4b1 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -3,8 +3,9 @@ use anyhow::Result; use ruff_python_ast::relocate::relocate_expr; -use ruff_python_ast::{str, Expr}; -use ruff_text_size::{TextLen, TextRange}; +use ruff_python_ast::str::raw_contents; +use ruff_python_ast::{Expr, ExprStringLiteral, StringFlags, StringLiteral}; +use ruff_text_size::Ranged; use crate::{parse_expression, parse_expression_range}; @@ -16,37 +17,60 @@ pub enum AnnotationKind { /// expressions within the annotation and apply automatic fixes, which is /// not possible for complex string literals. Simple, + /// The annotation is defined as part of a complex string literal, such as /// a literal containing an implicit concatenation or escaped characters, /// e.g. `x: "List" "[int]" = []`. These are comparatively rare, but valid. Complex, } -/// Parses the value of a string literal node (`parsed_contents`) with `range` as a type -/// annotation. The given `source` is the entire source code. +/// Parses the given string expression node as a type annotation. The given `source` is the entire +/// source code. pub fn parse_type_annotation( - parsed_contents: &str, - range: TextRange, + string_expr: &ExprStringLiteral, source: &str, ) -> Result<(Expr, AnnotationKind)> { - let expression = &source[range]; - - if str::raw_contents(expression).is_some_and(|raw_contents| raw_contents == parsed_contents) { - // The annotation is considered "simple" if and only if the raw representation (e.g., - // `List[int]` within "List[int]") exactly matches the parsed representation. This - // isn't the case, e.g., for implicit concatenations, or for annotations that contain - // escaped quotes. - let leading_quote_len = str::leading_quote(expression).unwrap().text_len(); - let trailing_quote_len = str::trailing_quote(expression).unwrap().text_len(); - let range = range - .add_start(leading_quote_len) - .sub_end(trailing_quote_len); - let expr = parse_expression_range(source, range)?.into_expr(); - Ok((expr, AnnotationKind::Simple)) + let expr_text = &source[string_expr.range()]; + + if let [string_literal] = string_expr.value.as_slice() { + // Compare the raw contents (without quotes) of the expression with the parsed contents + // contained in the string literal. + if raw_contents(expr_text) + .is_some_and(|raw_contents| raw_contents == string_literal.as_str()) + { + parse_simple_type_annotation(string_literal, source) + } else { + // The raw contents of the string doesn't match the parsed content. This could be the + // case for annotations that contain escaped quotes. + parse_complex_type_annotation(string_expr) + } } else { - // Otherwise, consider this a "complex" annotation. - let mut expr = parse_expression(parsed_contents)?.into_expr(); - relocate_expr(&mut expr, range); - Ok((expr, AnnotationKind::Complex)) + // String is implicitly concatenated. + parse_complex_type_annotation(string_expr) } } + +fn parse_simple_type_annotation( + string_literal: &StringLiteral, + source: &str, +) -> Result<(Expr, AnnotationKind)> { + Ok(( + parse_expression_range( + source, + string_literal + .range() + .add_start(string_literal.flags.opener_len()) + .sub_end(string_literal.flags.closer_len()), + )? + .into_expr(), + AnnotationKind::Simple, + )) +} + +fn parse_complex_type_annotation( + string_expr: &ExprStringLiteral, +) -> Result<(Expr, AnnotationKind)> { + let mut parsed = parse_expression(string_expr.value.to_str())?.into_expr(); + relocate_expr(&mut parsed, string_expr.range()); + Ok((parsed, AnnotationKind::Complex)) +} From a58bde695855fa66d564c6ed68c280603613f5a7 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:38:24 +0530 Subject: [PATCH 23/25] Remove less used parser dependencies (#11718) ## Summary This PR removes the following dependencies from the `ruff_python_parser` crate: * `anyhow` (moved to dev dependencies) * `is-macro` * `itertools` The main motivation is that they aren't used much. Additionally, it updates the return type of `parse_type_annotation` to use a more specific `ParseError` instead of the generic `anyhow::Error`. ## Test Plan `cargo insta test` --- Cargo.lock | 2 -- crates/ruff_python_parser/Cargo.toml | 4 +--- crates/ruff_python_parser/src/lexer.rs | 9 +++++---- crates/ruff_python_parser/src/lib.rs | 6 ++---- crates/ruff_python_parser/src/typing.rs | 19 ++++++++++++------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f65270364507b..f6c6f3ed1d103 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2249,8 +2249,6 @@ dependencies = [ "bitflags 2.5.0", "bstr", "insta", - "is-macro", - "itertools 0.13.0", "memchr", "ruff_python_ast", "ruff_python_trivia", diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index 00ac193efedf0..834baac8532f8 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -17,11 +17,8 @@ ruff_python_ast = { workspace = true } ruff_python_trivia = { workspace = true } ruff_text_size = { workspace = true } -anyhow = { workspace = true } bitflags = { workspace = true } bstr = { workspace = true } -is-macro = { workspace = true } -itertools = { workspace = true } memchr = { workspace = true } rustc-hash = { workspace = true } static_assertions = { workspace = true } @@ -33,6 +30,7 @@ unicode-normalization = { workspace = true } ruff_source_file = { workspace = true } annotate-snippets = { workspace = true } +anyhow = { workspace = true } insta = { workspace = true, features = ["glob"] } walkdir = { workspace = true } diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 5b5bb3d213f0a..41724a13dd178 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -6,16 +6,17 @@ //! //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html -use std::{char, cmp::Ordering, str::FromStr}; +use std::cmp::Ordering; +use std::str::FromStr; use bitflags::bitflags; +use unicode_ident::{is_xid_continue, is_xid_start}; +use unicode_normalization::UnicodeNormalization; + use ruff_python_ast::str::Quote; use ruff_python_ast::str_prefix::{ AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, }; -use unicode_ident::{is_xid_continue, is_xid_start}; -use unicode_normalization::UnicodeNormalization; - use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 52b436592b92c..a0c480cef63d2 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -73,7 +73,6 @@ pub use crate::token::TokenKind; use crate::parser::Parser; -use itertools::Itertools; use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite}; use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -388,9 +387,8 @@ impl Tokens { let end = *self.first_unknown_or_len.get_or_init(|| { self.raw .iter() - .find_position(|token| token.kind() == TokenKind::Unknown) - .map(|(idx, _)| idx) - .unwrap_or_else(|| self.raw.len()) + .position(|token| token.kind() == TokenKind::Unknown) + .unwrap_or(self.raw.len()) }); &self.raw[..end] } diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index a848d538dc4b1..4047e79f9cc69 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -1,15 +1,13 @@ //! This module takes care of parsing a type annotation. -use anyhow::Result; - use ruff_python_ast::relocate::relocate_expr; use ruff_python_ast::str::raw_contents; use ruff_python_ast::{Expr, ExprStringLiteral, StringFlags, StringLiteral}; use ruff_text_size::Ranged; -use crate::{parse_expression, parse_expression_range}; +use crate::{parse_expression, parse_expression_range, ParseError}; -#[derive(is_macro::Is, Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug)] pub enum AnnotationKind { /// The annotation is defined as part a simple string literal, /// e.g. `x: "List[int]" = []`. Annotations within simple literals @@ -24,12 +22,19 @@ pub enum AnnotationKind { Complex, } +impl AnnotationKind { + /// Returns `true` if the annotation kind is simple. + pub const fn is_simple(self) -> bool { + matches!(self, AnnotationKind::Simple) + } +} + /// Parses the given string expression node as a type annotation. The given `source` is the entire /// source code. pub fn parse_type_annotation( string_expr: &ExprStringLiteral, source: &str, -) -> Result<(Expr, AnnotationKind)> { +) -> Result<(Expr, AnnotationKind), ParseError> { let expr_text = &source[string_expr.range()]; if let [string_literal] = string_expr.value.as_slice() { @@ -53,7 +58,7 @@ pub fn parse_type_annotation( fn parse_simple_type_annotation( string_literal: &StringLiteral, source: &str, -) -> Result<(Expr, AnnotationKind)> { +) -> Result<(Expr, AnnotationKind), ParseError> { Ok(( parse_expression_range( source, @@ -69,7 +74,7 @@ fn parse_simple_type_annotation( fn parse_complex_type_annotation( string_expr: &ExprStringLiteral, -) -> Result<(Expr, AnnotationKind)> { +) -> Result<(Expr, AnnotationKind), ParseError> { let mut parsed = parse_expression(string_expr.value.to_str())?.into_expr(); relocate_expr(&mut parsed, string_expr.range()); Ok((parsed, AnnotationKind::Complex)) From 8db147c09d4a3c0621077858f3c6154bf8215461 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:44:21 +0530 Subject: [PATCH 24/25] Generator should add a newline before type statement (#11720) ## Summary This PR fixes a bug where the `Generator` wouldn't add a newline before a type alias statement. This is because it wasn't using the `statement` macro which takes care of the newline. Without this fix, a code like: ```py type X = int type Y = str ``` The generator would produce: ```py type X = inttype Y = str ``` ## Test Plan Add a test case. --- crates/ruff_python_codegen/src/generator.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index 9cb98dd7c6174..01ff10a9ab42e 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -482,13 +482,15 @@ impl<'a> Generator<'a> { type_params, value, }) => { - self.p("type "); - self.unparse_expr(name, precedence::MAX); - if let Some(type_params) = type_params { - self.unparse_type_params(type_params); - } - self.p(" = "); - self.unparse_expr(value, precedence::ASSIGN); + statement!({ + self.p("type "); + self.unparse_expr(name, precedence::MAX); + if let Some(type_params) = type_params { + self.unparse_type_params(type_params); + } + self.p(" = "); + self.unparse_expr(value, precedence::ASSIGN); + }); } Stmt::Raise(ast::StmtRaise { exc, @@ -1634,6 +1636,10 @@ except* Exception as e: return 2 case 4 as y: return y" + ); + assert_round_trip!( + r"type X = int +type Y = str" ); assert_eq!(round_trip(r"x = (1, 2, 3)"), r"x = 1, 2, 3"); assert_eq!(round_trip(r"-(1) + ~(2) + +(3)"), r"-1 + ~2 + +3"); From 2b28889ca9d7935488875b5c944a159a2db20a23 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:50:55 +0530 Subject: [PATCH 25/25] Isolate non-breaking whitespace indentation test case (#11721) As discussed in Discord, this moves the test case for non-breaking whitespace into its own method. --- crates/ruff_python_codegen/src/stylist.rs | 25 +++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/ruff_python_codegen/src/stylist.rs b/crates/ruff_python_codegen/src/stylist.rs index 375f0c8e16e92..d5a1ea53cd110 100644 --- a/crates/ruff_python_codegen/src/stylist.rs +++ b/crates/ruff_python_codegen/src/stylist.rs @@ -214,6 +214,20 @@ x = ( let stylist = Stylist::from_tokens(parsed.tokens(), &locator); assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); + // formfeed indent, see `detect_indention` comment. + let contents = r" +class FormFeedIndent: + def __init__(self, a=[]): + print(a) +"; + let locator = Locator::new(contents); + let parsed = parse_module(contents).unwrap(); + let stylist = Stylist::from_tokens(parsed.tokens(), &locator); + assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); + } + + #[test] + fn indent_non_breaking_whitespace() { let contents = r" x = (  1, @@ -227,17 +241,6 @@ x = ( Stylist::from_tokens(parsed.tokens(), &locator).indentation(), &Indentation(" ".to_string()) ); - - // formfeed indent, see `detect_indention` comment. - let contents = r" -class FormFeedIndent: - def __init__(self, a=[]): - print(a) -"; - let locator = Locator::new(contents); - let parsed = parse_module(contents).unwrap(); - let stylist = Stylist::from_tokens(parsed.tokens(), &locator); - assert_eq!(stylist.indentation(), &Indentation(" ".to_string())); } #[test]