From f3d4139a0375149f09a5672420e9a34347ff4bd8 Mon Sep 17 00:00:00 2001 From: Claudiu Popa Date: Wed, 29 Apr 2020 08:51:27 +0200 Subject: [PATCH] Restructure the AST parsing heuristic to always pick the same module When a file contained a misplaced type annotation, we were retrying the parsing without type comments support. That second parsing was using the builtin ast module, but the rest of the tree utilities (the builder and rebuilder) were not aware of the new parsing module that was used to build the AST nodes a second time. This commit moves the logic of picking the parsing module and the corresponding AST node mapping in a single place, which can be used by both the builder and the rebuilder. Close PyCQA/pylint#3540 Close #773 --- ChangeLog | 5 ++ astroid/_ast.py | 123 +++++++++++++++++++++++++++++++++++-------- astroid/builder.py | 21 ++++---- astroid/rebuilder.py | 97 ++++++++++------------------------ 4 files changed, 143 insertions(+), 103 deletions(-) diff --git a/ChangeLog b/ChangeLog index d85874b6bf..9f428adc87 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,6 +16,11 @@ Release Date: TBA Close #772 +* Restructure the AST parsing heuristic to always pick the same module + + Close PyCQA/pylint#3540 + Close #773 + What's New in astroid 2.4.0? ============================ diff --git a/astroid/_ast.py b/astroid/_ast.py index 66c5cf258a..34b74c5f23 100644 --- a/astroid/_ast.py +++ b/astroid/_ast.py @@ -4,10 +4,11 @@ from typing import Optional import sys -_ast_py2 = _ast_py3 = None +import astroid + +_ast_py3 = None try: import typed_ast.ast3 as _ast_py3 - import typed_ast.ast27 as _ast_py2 except ImportError: pass @@ -21,28 +22,30 @@ FunctionType = namedtuple("FunctionType", ["argtypes", "returns"]) -def _get_parser_module(parse_python_two=False, type_comments_support=True): - if not type_comments_support: - return ast - - if parse_python_two: - parser_module = _ast_py2 - else: - parser_module = _ast_py3 - return parser_module or ast - - -def _parse(string: str, parse_python_two=False, type_comments=True): - parse_module = _get_parser_module( - parse_python_two=parse_python_two, type_comments_support=type_comments +class ParserModule( + namedtuple( + "ParserModule", + [ + "module", + "unary_op_classes", + "cmp_op_classes", + "bool_op_classes", + "bin_op_classes", + "context_classes", + ], ) - parse_func = parse_module.parse - if parse_module is _ast_py3: - if PY38: - parse_func = partial(parse_func, type_comments=type_comments) - if not parse_python_two: - parse_func = partial(parse_func, feature_version=sys.version_info.minor) - return parse_func(string) +): + def parse(self, string: str, type_comments=True): + if self.module is _ast_py3: + if PY38: + parse_func = partial(self.module.parse, type_comments=type_comments) + else: + parse_func = partial( + self.module.parse, feature_version=sys.version_info.minor + ) + else: + parse_func = self.module.parse + return parse_func(string) def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]: @@ -52,3 +55,77 @@ def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]: func_type = _ast_py3.parse(type_comment, "", "func_type") return FunctionType(argtypes=func_type.argtypes, returns=func_type.returns) + + +def get_parser_module(type_comments=True) -> ParserModule: + if not type_comments: + parser_module = ast + else: + parser_module = _ast_py3 + parser_module = parser_module or ast + + unary_op_classes = _unary_operators_from_module(parser_module) + cmp_op_classes = _compare_operators_from_module(parser_module) + bool_op_classes = _bool_operators_from_module(parser_module) + bin_op_classes = _binary_operators_from_module(parser_module) + context_classes = _contexts_from_module(parser_module) + + return ParserModule( + parser_module, + unary_op_classes, + cmp_op_classes, + bool_op_classes, + bin_op_classes, + context_classes, + ) + + +def _unary_operators_from_module(module): + return {module.UAdd: "+", module.USub: "-", module.Not: "not", module.Invert: "~"} + + +def _binary_operators_from_module(module): + binary_operators = { + module.Add: "+", + module.BitAnd: "&", + module.BitOr: "|", + module.BitXor: "^", + module.Div: "/", + module.FloorDiv: "//", + module.MatMult: "@", + module.Mod: "%", + module.Mult: "*", + module.Pow: "**", + module.Sub: "-", + module.LShift: "<<", + module.RShift: ">>", + } + return binary_operators + + +def _bool_operators_from_module(module): + return {module.And: "and", module.Or: "or"} + + +def _compare_operators_from_module(module): + return { + module.Eq: "==", + module.Gt: ">", + module.GtE: ">=", + module.In: "in", + module.Is: "is", + module.IsNot: "is not", + module.Lt: "<", + module.LtE: "<=", + module.NotEq: "!=", + module.NotIn: "not in", + } + + +def _contexts_from_module(module): + return { + module.Load: astroid.Load, + module.Store: astroid.Store, + module.Del: astroid.Del, + module.Param: astroid.Store, + } diff --git a/astroid/builder.py b/astroid/builder.py index da37f5bd59..9e808f1b54 100644 --- a/astroid/builder.py +++ b/astroid/builder.py @@ -22,7 +22,7 @@ import textwrap from tokenize import detect_encoding -from astroid._ast import _parse +from astroid._ast import get_parser_module from astroid import bases from astroid import exceptions from astroid import manager @@ -42,7 +42,7 @@ # The comment used to select a statement to be extracted # when calling extract_node. _STATEMENT_SELECTOR = "#@" - +MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation" MANAGER = manager.AstroidManager() @@ -165,7 +165,7 @@ def _post_build(self, module, encoding): def _data_build(self, data, modname, path): """Build tree node from data and add some informations""" try: - node = _parse_string(data) + node, parser_module = _parse_string(data, type_comments=True) except (TypeError, ValueError, SyntaxError) as exc: raise exceptions.AstroidSyntaxError( "Parsing Python code failed:\n{error}", @@ -174,6 +174,7 @@ def _data_build(self, data, modname, path): path=path, error=exc, ) from exc + if path is not None: node_file = os.path.abspath(path) else: @@ -186,7 +187,7 @@ def _data_build(self, data, modname, path): path is not None and os.path.splitext(os.path.basename(path))[0] == "__init__" ) - builder = rebuilder.TreeRebuilder(self._manager) + builder = rebuilder.TreeRebuilder(self._manager, parser_module) module = builder.visit_module(node, modname, node_file, package) module._import_from_nodes = builder._import_from_nodes module._delayed_assattr = builder._delayed_assattr @@ -438,17 +439,17 @@ def _extract(node): return extracted -MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation" - - def _parse_string(data, type_comments=True): + parser_module = get_parser_module(type_comments=type_comments) try: - node = _parse(data + "\n", type_comments=type_comments) + parsed = parser_module.parse(data + "\n", type_comments=type_comments) except SyntaxError as exc: # If the type annotations are misplaced for some reason, we do not want # to fail the entire parsing of the file, so we need to retry the parsing without # type comment support. if exc.args[0] != MISPLACED_TYPE_ANNOTATION_ERROR or not type_comments: raise - node = _parse(data + "\n", type_comments=False) - return node + + parser_module = get_parser_module(type_comments=False) + parsed = parser_module.parse(data + "\n", type_comments=False) + return parsed, parser_module diff --git a/astroid/rebuilder.py b/astroid/rebuilder.py index 529c7295f9..3fc1a83f2b 100644 --- a/astroid/rebuilder.py +++ b/astroid/rebuilder.py @@ -27,9 +27,10 @@ """ import sys +from typing import Optional import astroid -from astroid._ast import _parse, _get_parser_module, parse_function_type_comment +from astroid._ast import parse_function_type_comment, get_parser_module, ParserModule from astroid import nodes @@ -47,57 +48,6 @@ PY38 = sys.version_info >= (3, 8) -def _binary_operators_from_module(module): - binary_operators = { - module.Add: "+", - module.BitAnd: "&", - module.BitOr: "|", - module.BitXor: "^", - module.Div: "/", - module.FloorDiv: "//", - module.MatMult: "@", - module.Mod: "%", - module.Mult: "*", - module.Pow: "**", - module.Sub: "-", - module.LShift: "<<", - module.RShift: ">>", - } - return binary_operators - - -def _bool_operators_from_module(module): - return {module.And: "and", module.Or: "or"} - - -def _unary_operators_from_module(module): - return {module.UAdd: "+", module.USub: "-", module.Not: "not", module.Invert: "~"} - - -def _compare_operators_from_module(module): - return { - module.Eq: "==", - module.Gt: ">", - module.GtE: ">=", - module.In: "in", - module.Is: "is", - module.IsNot: "is not", - module.Lt: "<", - module.LtE: "<=", - module.NotEq: "!=", - module.NotIn: "not in", - } - - -def _contexts_from_module(module): - return { - module.Load: astroid.Load, - module.Store: astroid.Store, - module.Del: astroid.Del, - module.Param: astroid.Store, - } - - def _visit_or_none(node, attr, visitor, parent, visit="visit", **kws): """If the given node has an attribute, visits the attribute, and otherwise returns None. @@ -113,32 +63,30 @@ def _visit_or_none(node, attr, visitor, parent, visit="visit", **kws): class TreeRebuilder: """Rebuilds the _ast tree to become an Astroid tree""" - def __init__(self, manager, parse_python_two: bool = False): + def __init__(self, manager, parser_module: Optional[ParserModule] = None): self._manager = manager self._global_names = [] self._import_from_nodes = [] self._delayed_assattr = [] self._visit_meths = {} - # Configure the right classes for the right module - self._parser_module = _get_parser_module(parse_python_two=parse_python_two) - self._unary_op_classes = _unary_operators_from_module(self._parser_module) - self._cmp_op_classes = _compare_operators_from_module(self._parser_module) - self._bool_op_classes = _bool_operators_from_module(self._parser_module) - self._bin_op_classes = _binary_operators_from_module(self._parser_module) - self._context_classes = _contexts_from_module(self._parser_module) + if parser_module is None: + self._parser_module = get_parser_module() + else: + self._parser_module = parser_module + self._module = self._parser_module.module def _get_doc(self, node): try: if PY37 and hasattr(node, "docstring"): doc = node.docstring return node, doc - if node.body and isinstance(node.body[0], self._parser_module.Expr): + if node.body and isinstance(node.body[0], self._module.Expr): first_value = node.body[0].value - if isinstance(first_value, self._parser_module.Str) or ( + if isinstance(first_value, self._module.Str) or ( PY38 - and isinstance(first_value, self._parser_module.Constant) + and isinstance(first_value, self._module.Constant) and isinstance(first_value.value, str) ): doc = first_value.value if PY38 else first_value.s @@ -149,7 +97,7 @@ def _get_doc(self, node): return node, None def _get_context(self, node): - return self._context_classes.get(type(node.ctx), astroid.Load) + return self._parser_module.context_classes.get(type(node.ctx), astroid.Load) def visit_module(self, node, modname, modpath, package): """visit a Module node by returning a fresh instance of it""" @@ -279,7 +227,7 @@ def check_type_comment(self, node, parent): return None try: - type_comment_ast = _parse(type_comment) + type_comment_ast = self._parser_module.parse(type_comment) except SyntaxError: # Invalid type comment, just skip it. return None @@ -362,7 +310,7 @@ def visit_assignname(self, node, parent, node_name=None): def visit_augassign(self, node, parent): """visit a AugAssign node by returning a fresh instance of it""" newnode = nodes.AugAssign( - self._bin_op_classes[type(node.op)] + "=", + self._parser_module.bin_op_classes[type(node.op)] + "=", node.lineno, node.col_offset, parent, @@ -381,7 +329,10 @@ def visit_repr(self, node, parent): def visit_binop(self, node, parent): """visit a BinOp node by returning a fresh instance of it""" newnode = nodes.BinOp( - self._bin_op_classes[type(node.op)], node.lineno, node.col_offset, parent + self._parser_module.bin_op_classes[type(node.op)], + node.lineno, + node.col_offset, + parent, ) newnode.postinit( self.visit(node.left, newnode), self.visit(node.right, newnode) @@ -391,7 +342,10 @@ def visit_binop(self, node, parent): def visit_boolop(self, node, parent): """visit a BoolOp node by returning a fresh instance of it""" newnode = nodes.BoolOp( - self._bool_op_classes[type(node.op)], node.lineno, node.col_offset, parent + self._parser_module.bool_op_classes[type(node.op)], + node.lineno, + node.col_offset, + parent, ) newnode.postinit([self.visit(child, newnode) for child in node.values]) return newnode @@ -485,7 +439,10 @@ def visit_compare(self, node, parent): newnode.postinit( self.visit(node.left, newnode), [ - (self._cmp_op_classes[op.__class__], self.visit(expr, newnode)) + ( + self._parser_module.cmp_op_classes[op.__class__], + self.visit(expr, newnode), + ) for (op, expr) in zip(node.ops, node.comparators) ], ) @@ -1002,7 +959,7 @@ def visit_tuple(self, node, parent): def visit_unaryop(self, node, parent): """visit a UnaryOp node by returning a fresh instance of it""" newnode = nodes.UnaryOp( - self._unary_op_classes[node.op.__class__], + self._parser_module.unary_op_classes[node.op.__class__], node.lineno, node.col_offset, parent,