Skip to content

Commit

Permalink
black/parser: partial support for pattern matching (#2586)
Browse files Browse the repository at this point in the history
Partial implementation for #2242. Only works when explicitly stated -t py310.

Co-authored-by: Richard Si <63936253+ichard26@users.noreply.github.com>
  • Loading branch information
2 people authored and JelleZijlstra committed Nov 16, 2021
1 parent 2e1b951 commit 3c2ea8a
Show file tree
Hide file tree
Showing 14 changed files with 553 additions and 22 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

- Warn about Python 2 deprecation in more cases by improving Python 2 only syntax
detection (#2592)
- Add partial support for the match statement. As it's experimental, it's only enabled
when `--target-version py310` is explicitly specified (#2586)
- Add support for parenthesized with (#2586)

## 21.10b0

Expand Down
6 changes: 5 additions & 1 deletion src/black/linegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def visit_stmt(
"""Visit a statement.
This implementation is shared for `if`, `while`, `for`, `try`, `except`,
`def`, `with`, `class`, `assert` and assignments.
`def`, `with`, `class`, `assert`, `match`, `case` and assignments.
The relevant Python language `keywords` for a given statement will be
NAME leaves within it. This methods puts those on a separate line.
Expand Down Expand Up @@ -292,6 +292,10 @@ def __post_init__(self) -> None:
self.visit_async_funcdef = self.visit_async_stmt
self.visit_decorated = self.visit_decorators

# PEP 634
self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø)
self.visit_case_block = partial(v, keywords={"case"}, parens=Ø)


def transform_line(
line: Line, mode: Mode, features: Collection[Feature] = ()
Expand Down
5 changes: 5 additions & 0 deletions src/black/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class TargetVersion(Enum):
PY37 = 7
PY38 = 8
PY39 = 9
PY310 = 10

def is_python2(self) -> bool:
return self is TargetVersion.PY27
Expand All @@ -39,6 +40,7 @@ class Feature(Enum):
ASSIGNMENT_EXPRESSIONS = 8
POS_ONLY_ARGUMENTS = 9
RELAXED_DECORATORS = 10
PATTERN_MATCHING = 11
FORCE_OPTIONAL_PARENTHESES = 50

# temporary for Python 2 deprecation
Expand Down Expand Up @@ -108,6 +110,9 @@ class Feature(Enum):
Feature.RELAXED_DECORATORS,
Feature.POS_ONLY_ARGUMENTS,
},
TargetVersion.PY310: {
Feature.PATTERN_MATCHING,
},
}


Expand Down
3 changes: 3 additions & 0 deletions src/black/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:

# Python 3-compatible code, so only try Python 3 grammar.
grammars = []
if supports_feature(target_versions, Feature.PATTERN_MATCHING):
# Python 3.10+
grammars.append(pygram.python_grammar_soft_keywords)
# If we have to parse both, try to parse async as a keyword first
if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
# Python 3.7+
Expand Down
41 changes: 36 additions & 5 deletions src/blib2to3/Grammar.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
exec_stmt: 'exec' expr ['in' test [',' test]]
assert_stmt: 'assert' test [',' test]

compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
Expand All @@ -115,9 +115,8 @@ try_stmt: ('try' ':' suite
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
with_var: 'as' expr
with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite

# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test [(',' | 'as') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
Expand All @@ -131,7 +130,15 @@ testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test

namedexpr_test: test [':=' test]
namedexpr_test: asexpr_test [':=' asexpr_test]

# This is actually not a real rule, though since the parser is very
# limited in terms of the strategy about match/case rules, we are inserting
# a virtual case (<expr> as <expr>) as a valid expression. Unless a better
# approach is thought, the only side effect of this seem to be just allowing
# more stuff to be parser (which would fail on the ast).
asexpr_test: test ['as' test]

test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
Expand Down Expand Up @@ -213,3 +220,27 @@ encoding_decl: NAME

yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr


# 3.10 match statement definition

# PS: normally the grammar is much much more restricted, but
# at this moment for not trying to bother much with encoding the
# exact same DSL in a LL(1) parser, we will just accept an expression
# and let the ast.parse() step of the safe mode to reject invalid
# grammar.

# The reason why it is more restricted is that, patterns are some
# sort of a DSL (more advanced than our LHS on assignments, but
# still in a very limited python subset). They are not really
# expressions, but who cares. If we can parse them, that is enough
# to reformat them.

match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT
subject_expr: namedexpr_test

# cases
case_block: "case" patterns [guard] ':' suite
guard: 'if' namedexpr_test
patterns: pattern ['as' pattern]
pattern: (expr|star_expr) (',' (expr|star_expr))* [',']
81 changes: 79 additions & 2 deletions src/blib2to3/pgen2/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,92 @@
List,
Optional,
Text,
Iterator,
Tuple,
TypeVar,
Generic,
Union,
)
from dataclasses import dataclass, field

# Pgen imports
from . import grammar, parse, token, tokenize, pgen
from logging import Logger
from blib2to3.pytree import _Convert, NL
from blib2to3.pgen2.grammar import Grammar
from contextlib import contextmanager

Path = Union[str, "os.PathLike[str]"]


@dataclass
class ReleaseRange:
start: int
end: Optional[int] = None
tokens: List[Any] = field(default_factory=list)

def lock(self) -> None:
total_eaten = len(self.tokens)
self.end = self.start + total_eaten


class TokenProxy:
def __init__(self, generator: Any) -> None:
self._tokens = generator
self._counter = 0
self._release_ranges: List[ReleaseRange] = []

@contextmanager
def release(self) -> Iterator["TokenProxy"]:
release_range = ReleaseRange(self._counter)
self._release_ranges.append(release_range)
try:
yield self
finally:
# Lock the last release range to the final position that
# has been eaten.
release_range.lock()

def eat(self, point: int) -> Any:
eaten_tokens = self._release_ranges[-1].tokens
if point < len(eaten_tokens):
return eaten_tokens[point]
else:
while point >= len(eaten_tokens):
token = next(self._tokens)
eaten_tokens.append(token)
return token

def __iter__(self) -> "TokenProxy":
return self

def __next__(self) -> Any:
# If the current position is already compromised (looked up)
# return the eaten token, if not just go further on the given
# token producer.
for release_range in self._release_ranges:
assert release_range.end is not None

start, end = release_range.start, release_range.end
if start <= self._counter < end:
token = release_range.tokens[self._counter - start]
break
else:
token = next(self._tokens)
self._counter += 1
return token

def can_advance(self, to: int) -> bool:
# Try to eat, fail if it can't. The eat operation is cached
# so there wont be any additional cost of eating here
try:
self.eat(to)
except StopIteration:
return False
else:
return True


class Driver(object):
def __init__(
self,
Expand All @@ -57,14 +130,18 @@ def __init__(
def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
proxy = TokenProxy(tokens)

p = parse.Parser(self.grammar, self.convert)
p.setup()
p.setup(proxy=proxy)

lineno = 1
column = 0
indent_columns = []
type = value = start = end = line_text = None
prefix = ""
for quintuple in tokens:

for quintuple in proxy:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
Expand Down
2 changes: 2 additions & 0 deletions src/blib2to3/pgen2/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(self) -> None:
self.dfas: Dict[int, DFAS] = {}
self.labels: List[Label] = [(0, "EMPTY")]
self.keywords: Dict[str, int] = {}
self.soft_keywords: Dict[str, int] = {}
self.tokens: Dict[int, int] = {}
self.symbol2label: Dict[str, int] = {}
self.start = 256
Expand Down Expand Up @@ -136,6 +137,7 @@ def copy(self: _P) -> _P:
"number2symbol",
"dfas",
"keywords",
"soft_keywords",
"tokens",
"symbol2label",
):
Expand Down
Loading

0 comments on commit 3c2ea8a

Please sign in to comment.