diff --git a/ChangeLog b/ChangeLog index 79497856f4..ece1502b93 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,11 @@ Release Date: TBA Close #772 +* Restructure the AST parsing heuristic to always pick the same module + + Close PyCQA/pylint#3540 + Close #773 + What's New in astroid 2.4.0? ============================ diff --git a/astroid/_ast.py b/astroid/_ast.py index 66c5cf258a..34b74c5f23 100644 --- a/astroid/_ast.py +++ b/astroid/_ast.py @@ -4,10 +4,11 @@ from typing import Optional import sys -_ast_py2 = _ast_py3 = None +import astroid + +_ast_py3 = None try: import typed_ast.ast3 as _ast_py3 - import typed_ast.ast27 as _ast_py2 except ImportError: pass @@ -21,28 +22,30 @@ FunctionType = namedtuple("FunctionType", ["argtypes", "returns"]) -def _get_parser_module(parse_python_two=False, type_comments_support=True): - if not type_comments_support: - return ast - - if parse_python_two: - parser_module = _ast_py2 - else: - parser_module = _ast_py3 - return parser_module or ast - - -def _parse(string: str, parse_python_two=False, type_comments=True): - parse_module = _get_parser_module( - parse_python_two=parse_python_two, type_comments_support=type_comments +class ParserModule( + namedtuple( + "ParserModule", + [ + "module", + "unary_op_classes", + "cmp_op_classes", + "bool_op_classes", + "bin_op_classes", + "context_classes", + ], ) - parse_func = parse_module.parse - if parse_module is _ast_py3: - if PY38: - parse_func = partial(parse_func, type_comments=type_comments) - if not parse_python_two: - parse_func = partial(parse_func, feature_version=sys.version_info.minor) - return parse_func(string) +): + def parse(self, string: str, type_comments=True): + if self.module is _ast_py3: + if PY38: + parse_func = partial(self.module.parse, type_comments=type_comments) + else: + parse_func = partial( + self.module.parse, feature_version=sys.version_info.minor + ) + else: + parse_func = self.module.parse + return parse_func(string) def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]: @@ -52,3 +55,77 @@ def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]: func_type = _ast_py3.parse(type_comment, "", "func_type") return FunctionType(argtypes=func_type.argtypes, returns=func_type.returns) + + +def get_parser_module(type_comments=True) -> ParserModule: + if not type_comments: + parser_module = ast + else: + parser_module = _ast_py3 + parser_module = parser_module or ast + + unary_op_classes = _unary_operators_from_module(parser_module) + cmp_op_classes = _compare_operators_from_module(parser_module) + bool_op_classes = _bool_operators_from_module(parser_module) + bin_op_classes = _binary_operators_from_module(parser_module) + context_classes = _contexts_from_module(parser_module) + + return ParserModule( + parser_module, + unary_op_classes, + cmp_op_classes, + bool_op_classes, + bin_op_classes, + context_classes, + ) + + +def _unary_operators_from_module(module): + return {module.UAdd: "+", module.USub: "-", module.Not: "not", module.Invert: "~"} + + +def _binary_operators_from_module(module): + binary_operators = { + module.Add: "+", + module.BitAnd: "&", + module.BitOr: "|", + module.BitXor: "^", + module.Div: "/", + module.FloorDiv: "//", + module.MatMult: "@", + module.Mod: "%", + module.Mult: "*", + module.Pow: "**", + module.Sub: "-", + module.LShift: "<<", + module.RShift: ">>", + } + return binary_operators + + +def _bool_operators_from_module(module): + return {module.And: "and", module.Or: "or"} + + +def _compare_operators_from_module(module): + return { + module.Eq: "==", + module.Gt: ">", + module.GtE: ">=", + module.In: "in", + module.Is: "is", + module.IsNot: "is not", + module.Lt: "<", + module.LtE: "<=", + module.NotEq: "!=", + module.NotIn: "not in", + } + + +def _contexts_from_module(module): + return { + module.Load: astroid.Load, + module.Store: astroid.Store, + module.Del: astroid.Del, + module.Param: astroid.Store, + } diff --git a/astroid/builder.py b/astroid/builder.py index da37f5bd59..9e808f1b54 100644 --- a/astroid/builder.py +++ b/astroid/builder.py @@ -22,7 +22,7 @@ import textwrap from tokenize import detect_encoding -from astroid._ast import _parse +from astroid._ast import get_parser_module from astroid import bases from astroid import exceptions from astroid import manager @@ -42,7 +42,7 @@ # The comment used to select a statement to be extracted # when calling extract_node. _STATEMENT_SELECTOR = "#@" - +MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation" MANAGER = manager.AstroidManager() @@ -165,7 +165,7 @@ def _post_build(self, module, encoding): def _data_build(self, data, modname, path): """Build tree node from data and add some informations""" try: - node = _parse_string(data) + node, parser_module = _parse_string(data, type_comments=True) except (TypeError, ValueError, SyntaxError) as exc: raise exceptions.AstroidSyntaxError( "Parsing Python code failed:\n{error}", @@ -174,6 +174,7 @@ def _data_build(self, data, modname, path): path=path, error=exc, ) from exc + if path is not None: node_file = os.path.abspath(path) else: @@ -186,7 +187,7 @@ def _data_build(self, data, modname, path): path is not None and os.path.splitext(os.path.basename(path))[0] == "__init__" ) - builder = rebuilder.TreeRebuilder(self._manager) + builder = rebuilder.TreeRebuilder(self._manager, parser_module) module = builder.visit_module(node, modname, node_file, package) module._import_from_nodes = builder._import_from_nodes module._delayed_assattr = builder._delayed_assattr @@ -438,17 +439,17 @@ def _extract(node): return extracted -MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation" - - def _parse_string(data, type_comments=True): + parser_module = get_parser_module(type_comments=type_comments) try: - node = _parse(data + "\n", type_comments=type_comments) + parsed = parser_module.parse(data + "\n", type_comments=type_comments) except SyntaxError as exc: # If the type annotations are misplaced for some reason, we do not want # to fail the entire parsing of the file, so we need to retry the parsing without # type comment support. if exc.args[0] != MISPLACED_TYPE_ANNOTATION_ERROR or not type_comments: raise - node = _parse(data + "\n", type_comments=False) - return node + + parser_module = get_parser_module(type_comments=False) + parsed = parser_module.parse(data + "\n", type_comments=False) + return parsed, parser_module diff --git a/astroid/rebuilder.py b/astroid/rebuilder.py index 529c7295f9..3fc1a83f2b 100644 --- a/astroid/rebuilder.py +++ b/astroid/rebuilder.py @@ -27,9 +27,10 @@ """ import sys +from typing import Optional import astroid -from astroid._ast import _parse, _get_parser_module, parse_function_type_comment +from astroid._ast import parse_function_type_comment, get_parser_module, ParserModule from astroid import nodes @@ -47,57 +48,6 @@ PY38 = sys.version_info >= (3, 8) -def _binary_operators_from_module(module): - binary_operators = { - module.Add: "+", - module.BitAnd: "&", - module.BitOr: "|", - module.BitXor: "^", - module.Div: "/", - module.FloorDiv: "//", - module.MatMult: "@", - module.Mod: "%", - module.Mult: "*", - module.Pow: "**", - module.Sub: "-", - module.LShift: "<<", - module.RShift: ">>", - } - return binary_operators - - -def _bool_operators_from_module(module): - return {module.And: "and", module.Or: "or"} - - -def _unary_operators_from_module(module): - return {module.UAdd: "+", module.USub: "-", module.Not: "not", module.Invert: "~"} - - -def _compare_operators_from_module(module): - return { - module.Eq: "==", - module.Gt: ">", - module.GtE: ">=", - module.In: "in", - module.Is: "is", - module.IsNot: "is not", - module.Lt: "<", - module.LtE: "<=", - module.NotEq: "!=", - module.NotIn: "not in", - } - - -def _contexts_from_module(module): - return { - module.Load: astroid.Load, - module.Store: astroid.Store, - module.Del: astroid.Del, - module.Param: astroid.Store, - } - - def _visit_or_none(node, attr, visitor, parent, visit="visit", **kws): """If the given node has an attribute, visits the attribute, and otherwise returns None. @@ -113,32 +63,30 @@ def _visit_or_none(node, attr, visitor, parent, visit="visit", **kws): class TreeRebuilder: """Rebuilds the _ast tree to become an Astroid tree""" - def __init__(self, manager, parse_python_two: bool = False): + def __init__(self, manager, parser_module: Optional[ParserModule] = None): self._manager = manager self._global_names = [] self._import_from_nodes = [] self._delayed_assattr = [] self._visit_meths = {} - # Configure the right classes for the right module - self._parser_module = _get_parser_module(parse_python_two=parse_python_two) - self._unary_op_classes = _unary_operators_from_module(self._parser_module) - self._cmp_op_classes = _compare_operators_from_module(self._parser_module) - self._bool_op_classes = _bool_operators_from_module(self._parser_module) - self._bin_op_classes = _binary_operators_from_module(self._parser_module) - self._context_classes = _contexts_from_module(self._parser_module) + if parser_module is None: + self._parser_module = get_parser_module() + else: + self._parser_module = parser_module + self._module = self._parser_module.module def _get_doc(self, node): try: if PY37 and hasattr(node, "docstring"): doc = node.docstring return node, doc - if node.body and isinstance(node.body[0], self._parser_module.Expr): + if node.body and isinstance(node.body[0], self._module.Expr): first_value = node.body[0].value - if isinstance(first_value, self._parser_module.Str) or ( + if isinstance(first_value, self._module.Str) or ( PY38 - and isinstance(first_value, self._parser_module.Constant) + and isinstance(first_value, self._module.Constant) and isinstance(first_value.value, str) ): doc = first_value.value if PY38 else first_value.s @@ -149,7 +97,7 @@ def _get_doc(self, node): return node, None def _get_context(self, node): - return self._context_classes.get(type(node.ctx), astroid.Load) + return self._parser_module.context_classes.get(type(node.ctx), astroid.Load) def visit_module(self, node, modname, modpath, package): """visit a Module node by returning a fresh instance of it""" @@ -279,7 +227,7 @@ def check_type_comment(self, node, parent): return None try: - type_comment_ast = _parse(type_comment) + type_comment_ast = self._parser_module.parse(type_comment) except SyntaxError: # Invalid type comment, just skip it. return None @@ -362,7 +310,7 @@ def visit_assignname(self, node, parent, node_name=None): def visit_augassign(self, node, parent): """visit a AugAssign node by returning a fresh instance of it""" newnode = nodes.AugAssign( - self._bin_op_classes[type(node.op)] + "=", + self._parser_module.bin_op_classes[type(node.op)] + "=", node.lineno, node.col_offset, parent, @@ -381,7 +329,10 @@ def visit_repr(self, node, parent): def visit_binop(self, node, parent): """visit a BinOp node by returning a fresh instance of it""" newnode = nodes.BinOp( - self._bin_op_classes[type(node.op)], node.lineno, node.col_offset, parent + self._parser_module.bin_op_classes[type(node.op)], + node.lineno, + node.col_offset, + parent, ) newnode.postinit( self.visit(node.left, newnode), self.visit(node.right, newnode) @@ -391,7 +342,10 @@ def visit_binop(self, node, parent): def visit_boolop(self, node, parent): """visit a BoolOp node by returning a fresh instance of it""" newnode = nodes.BoolOp( - self._bool_op_classes[type(node.op)], node.lineno, node.col_offset, parent + self._parser_module.bool_op_classes[type(node.op)], + node.lineno, + node.col_offset, + parent, ) newnode.postinit([self.visit(child, newnode) for child in node.values]) return newnode @@ -485,7 +439,10 @@ def visit_compare(self, node, parent): newnode.postinit( self.visit(node.left, newnode), [ - (self._cmp_op_classes[op.__class__], self.visit(expr, newnode)) + ( + self._parser_module.cmp_op_classes[op.__class__], + self.visit(expr, newnode), + ) for (op, expr) in zip(node.ops, node.comparators) ], ) @@ -1002,7 +959,7 @@ def visit_tuple(self, node, parent): def visit_unaryop(self, node, parent): """visit a UnaryOp node by returning a fresh instance of it""" newnode = nodes.UnaryOp( - self._unary_op_classes[node.op.__class__], + self._parser_module.unary_op_classes[node.op.__class__], node.lineno, node.col_offset, parent,