From 3f7d3afb0eea9d98c2af806fd2b25e9082fa2178 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Fri, 1 Jun 2018 21:36:21 -0700 Subject: [PATCH 1/4] Delete parso folder --- pythonFiles/parso/__init__.py | 58 -- pythonFiles/parso/_compatibility.py | 103 -- pythonFiles/parso/cache.py | 162 --- pythonFiles/parso/grammar.py | 249 ----- pythonFiles/parso/normalizer.py | 184 ---- pythonFiles/parso/parser.py | 78 -- pythonFiles/parso/pgen2/__init__.py | 8 - pythonFiles/parso/pgen2/grammar.py | 128 --- pythonFiles/parso/pgen2/parse.py | 223 ----- pythonFiles/parso/pgen2/pgen.py | 400 -------- pythonFiles/parso/python/__init__.py | 0 pythonFiles/parso/python/diff.py | 593 ----------- pythonFiles/parso/python/errors.py | 994 ------------------- pythonFiles/parso/python/grammar26.txt | 159 --- pythonFiles/parso/python/grammar27.txt | 143 --- pythonFiles/parso/python/grammar33.txt | 134 --- pythonFiles/parso/python/grammar34.txt | 134 --- pythonFiles/parso/python/grammar35.txt | 153 --- pythonFiles/parso/python/grammar36.txt | 157 --- pythonFiles/parso/python/grammar37.txt | 157 --- pythonFiles/parso/python/issue_list.txt | 176 ---- pythonFiles/parso/python/parser.py | 265 ----- pythonFiles/parso/python/pep8.py | 727 -------------- pythonFiles/parso/python/prefix.py | 97 -- pythonFiles/parso/python/token.py | 113 --- pythonFiles/parso/python/tokenize.py | 602 ------------ pythonFiles/parso/python/tree.py | 1192 ----------------------- pythonFiles/parso/tree.py | 363 ------- pythonFiles/parso/utils.py | 156 --- 29 files changed, 7908 deletions(-) delete mode 100644 pythonFiles/parso/__init__.py delete mode 100644 pythonFiles/parso/_compatibility.py delete mode 100644 pythonFiles/parso/cache.py delete mode 100644 pythonFiles/parso/grammar.py delete mode 100644 pythonFiles/parso/normalizer.py delete mode 100644 pythonFiles/parso/parser.py delete mode 100644 pythonFiles/parso/pgen2/__init__.py delete mode 100644 pythonFiles/parso/pgen2/grammar.py delete mode 100644 pythonFiles/parso/pgen2/parse.py delete mode 100644 pythonFiles/parso/pgen2/pgen.py delete mode 100644 pythonFiles/parso/python/__init__.py delete mode 100644 pythonFiles/parso/python/diff.py delete mode 100644 pythonFiles/parso/python/errors.py delete mode 100644 pythonFiles/parso/python/grammar26.txt delete mode 100644 pythonFiles/parso/python/grammar27.txt delete mode 100644 pythonFiles/parso/python/grammar33.txt delete mode 100644 pythonFiles/parso/python/grammar34.txt delete mode 100644 pythonFiles/parso/python/grammar35.txt delete mode 100644 pythonFiles/parso/python/grammar36.txt delete mode 100644 pythonFiles/parso/python/grammar37.txt delete mode 100644 pythonFiles/parso/python/issue_list.txt delete mode 100644 pythonFiles/parso/python/parser.py delete mode 100644 pythonFiles/parso/python/pep8.py delete mode 100644 pythonFiles/parso/python/prefix.py delete mode 100644 pythonFiles/parso/python/token.py delete mode 100644 pythonFiles/parso/python/tokenize.py delete mode 100644 pythonFiles/parso/python/tree.py delete mode 100644 pythonFiles/parso/tree.py delete mode 100644 pythonFiles/parso/utils.py diff --git a/pythonFiles/parso/__init__.py b/pythonFiles/parso/__init__.py deleted file mode 100644 index c4cce53ea690..000000000000 --- a/pythonFiles/parso/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -r""" -Parso is a Python parser that supports error recovery and round-trip parsing -for different Python versions (in multiple Python versions). Parso is also able -to list multiple syntax errors in your python file. - -Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful -for other projects as well. - -Parso consists of a small API to parse Python and analyse the syntax tree. - -.. _jedi: https://github.com/davidhalter/jedi - -A simple example: - ->>> import parso ->>> module = parso.parse('hello + 1', version="3.6") ->>> expr = module.children[0] ->>> expr -PythonNode(arith_expr, [, , ]) ->>> print(expr.get_code()) -hello + 1 ->>> name = expr.children[0] ->>> name - ->>> name.end_pos -(1, 5) ->>> expr.end_pos -(1, 9) - -To list multiple issues: - ->>> grammar = parso.load_grammar() ->>> module = grammar.parse('foo +\nbar\ncontinue') ->>> error1, error2 = grammar.iter_errors(module) ->>> error1.message -'SyntaxError: invalid syntax' ->>> error2.message -"SyntaxError: 'continue' not properly in loop" -""" - -from parso.parser import ParserSyntaxError -from parso.grammar import Grammar, load_grammar -from parso.utils import split_lines, python_bytes_to_unicode - - -__version__ = '0.2.0' - - -def parse(code=None, **kwargs): - """ - A utility function to avoid loading grammars. - Params are documented in :py:meth:`parso.Grammar.parse`. - - :param str version: The version used by :py:func:`parso.load_grammar`. - """ - version = kwargs.pop('version', None) - grammar = load_grammar(version=version) - return grammar.parse(code, **kwargs) diff --git a/pythonFiles/parso/_compatibility.py b/pythonFiles/parso/_compatibility.py deleted file mode 100644 index db411eebf981..000000000000 --- a/pythonFiles/parso/_compatibility.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -To ensure compatibility from Python ``2.6`` - ``3.3``, a module has been -created. Clearly there is huge need to use conforming syntax. -""" -import sys -import platform - -# Cannot use sys.version.major and minor names, because in Python 2.6 it's not -# a namedtuple. -py_version = int(str(sys.version_info[0]) + str(sys.version_info[1])) - -# unicode function -try: - unicode = unicode -except NameError: - unicode = str - -is_pypy = platform.python_implementation() == 'PyPy' - - -def use_metaclass(meta, *bases): - """ Create a class with a metaclass. """ - if not bases: - bases = (object,) - return meta("HackClass", bases, {}) - - -try: - encoding = sys.stdout.encoding - if encoding is None: - encoding = 'utf-8' -except AttributeError: - encoding = 'ascii' - - -def u(string): - """Cast to unicode DAMMIT! - Written because Python2 repr always implicitly casts to a string, so we - have to cast back to a unicode (and we know that we always deal with valid - unicode, because we check that in the beginning). - """ - if py_version >= 30: - return str(string) - - if not isinstance(string, unicode): - return unicode(str(string), 'UTF-8') - return string - - -try: - FileNotFoundError = FileNotFoundError -except NameError: - FileNotFoundError = IOError - - -def utf8_repr(func): - """ - ``__repr__`` methods in Python 2 don't allow unicode objects to be - returned. Therefore cast them to utf-8 bytes in this decorator. - """ - def wrapper(self): - result = func(self) - if isinstance(result, unicode): - return result.encode('utf-8') - else: - return result - - if py_version >= 30: - return func - else: - return wrapper - - -try: - from functools import total_ordering -except ImportError: - # Python 2.6 - def total_ordering(cls): - """Class decorator that fills in missing ordering methods""" - convert = { - '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), - ('__le__', lambda self, other: self < other or self == other), - ('__ge__', lambda self, other: not self < other)], - '__le__': [('__ge__', lambda self, other: not self <= other or self == other), - ('__lt__', lambda self, other: self <= other and not self == other), - ('__gt__', lambda self, other: not self <= other)], - '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), - ('__ge__', lambda self, other: self > other or self == other), - ('__le__', lambda self, other: not self > other)], - '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), - ('__gt__', lambda self, other: self >= other and not self == other), - ('__lt__', lambda self, other: not self >= other)] - } - roots = set(dir(cls)) & set(convert) - if not roots: - raise ValueError('must define at least one ordering operation: < > <= >=') - root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ - for opname, opfunc in convert[root]: - if opname not in roots: - opfunc.__name__ = opname - opfunc.__doc__ = getattr(int, opname).__doc__ - setattr(cls, opname, opfunc) - return cls diff --git a/pythonFiles/parso/cache.py b/pythonFiles/parso/cache.py deleted file mode 100644 index d0465d023086..000000000000 --- a/pythonFiles/parso/cache.py +++ /dev/null @@ -1,162 +0,0 @@ -import time -import os -import sys -import hashlib -import gc -import shutil -import platform -import errno -import logging - -try: - import cPickle as pickle -except: - import pickle - -from parso._compatibility import FileNotFoundError - -LOG = logging.getLogger(__name__) - - -_PICKLE_VERSION = 30 -""" -Version number (integer) for file system cache. - -Increment this number when there are any incompatible changes in -the parser tree classes. For example, the following changes -are regarded as incompatible. - -- A class name is changed. -- A class is moved to another module. -- A __slot__ of a class is changed. -""" - -_VERSION_TAG = '%s-%s%s-%s' % ( - platform.python_implementation(), - sys.version_info[0], - sys.version_info[1], - _PICKLE_VERSION -) -""" -Short name for distinguish Python implementations and versions. - -It's like `sys.implementation.cache_tag` but for Python < 3.3 -we generate something similar. See: -http://docs.python.org/3/library/sys.html#sys.implementation -""" - -def _get_default_cache_path(): - if platform.system().lower() == 'windows': - dir_ = os.path.join(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') - elif platform.system().lower() == 'darwin': - dir_ = os.path.join('~', 'Library', 'Caches', 'Parso') - else: - dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') - return os.path.expanduser(dir_) - -_default_cache_path = _get_default_cache_path() -""" -The path where the cache is stored. - -On Linux, this defaults to ``~/.cache/parso/``, on OS X to -``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. -On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, -``$XDG_CACHE_HOME/parso`` is used instead of the default one. -""" - -parser_cache = {} - - -class _NodeCacheItem(object): - def __init__(self, node, lines, change_time=None): - self.node = node - self.lines = lines - if change_time is None: - change_time = time.time() - self.change_time = change_time - - -def load_module(hashed_grammar, path, cache_path=None): - """ - Returns a module or None, if it fails. - """ - try: - p_time = os.path.getmtime(path) - except FileNotFoundError: - return None - - try: - module_cache_item = parser_cache[hashed_grammar][path] - if p_time <= module_cache_item.change_time: - return module_cache_item.node - except KeyError: - return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path) - - -def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): - cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) - try: - try: - if p_time > os.path.getmtime(cache_path): - # Cache is outdated - return None - except OSError as e: - if e.errno == errno.ENOENT: - # In Python 2 instead of an IOError here we get an OSError. - raise FileNotFoundError - else: - raise - - with open(cache_path, 'rb') as f: - gc.disable() - try: - module_cache_item = pickle.load(f) - finally: - gc.enable() - except FileNotFoundError: - return None - else: - parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item - LOG.debug('pickle loaded: %s', path) - return module_cache_item.node - - -def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None): - try: - p_time = None if path is None else os.path.getmtime(path) - except OSError: - p_time = None - pickling = False - - item = _NodeCacheItem(module, lines, p_time) - parser_cache.setdefault(hashed_grammar, {})[path] = item - if pickling and path is not None: - _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) - - -def _save_to_file_system(hashed_grammar, path, item, cache_path=None): - with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: - pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) - - -def clear_cache(cache_path=None): - if cache_path is None: - cache_path = _default_cache_path - shutil.rmtree(cache_path) - parser_cache.clear() - - -def _get_hashed_path(hashed_grammar, path, cache_path=None): - directory = _get_cache_directory_path(cache_path=cache_path) - - file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() - return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) - - -def _get_cache_directory_path(cache_path=None): - if cache_path is None: - cache_path = _default_cache_path - directory = os.path.join(cache_path, _VERSION_TAG) - if not os.path.exists(directory): - os.makedirs(directory) - return directory diff --git a/pythonFiles/parso/grammar.py b/pythonFiles/parso/grammar.py deleted file mode 100644 index c825b5554c0e..000000000000 --- a/pythonFiles/parso/grammar.py +++ /dev/null @@ -1,249 +0,0 @@ -import hashlib -import os - -from parso._compatibility import FileNotFoundError, is_pypy -from parso.pgen2.pgen import generate_grammar -from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string -from parso.python.diff import DiffParser -from parso.python.tokenize import tokenize_lines, tokenize -from parso.python import token -from parso.cache import parser_cache, load_module, save_module -from parso.parser import BaseParser -from parso.python.parser import Parser as PythonParser -from parso.python.errors import ErrorFinderConfig -from parso.python import pep8 - -_loaded_grammars = {} - - -class Grammar(object): - """ - :py:func:`parso.load_grammar` returns instances of this class. - - Creating custom grammars by calling this is not supported, yet. - """ - #:param text: A BNF representation of your grammar. - _error_normalizer_config = None - _token_namespace = None - _default_normalizer_config = pep8.PEP8NormalizerConfig() - - def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None): - self._pgen_grammar = generate_grammar( - text, - token_namespace=self._get_token_namespace() - ) - self._parser = parser - self._tokenizer = tokenizer - self._diff_parser = diff_parser - self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() - - def parse(self, code=None, **kwargs): - """ - If you want to parse a Python file you want to start here, most likely. - - If you need finer grained control over the parsed instance, there will be - other ways to access it. - - :param str code: A unicode or bytes string. When it's not possible to - decode bytes to a string, returns a - :py:class:`UnicodeDecodeError`. - :param bool error_recovery: If enabled, any code will be returned. If - it is invalid, it will be returned as an error node. If disabled, - you will get a ParseError when encountering syntax errors in your - code. - :param str start_symbol: The grammar symbol that you want to parse. Only - allowed to be used when error_recovery is False. - :param str path: The path to the file you want to open. Only needed for caching. - :param bool cache: Keeps a copy of the parser tree in RAM and on disk - if a path is given. Returns the cached trees if the corresponding - files on disk have not changed. - :param bool diff_cache: Diffs the cached python module against the new - code and tries to parse only the parts that have changed. Returns - the same (changed) module that is found in cache. Using this option - requires you to not do anything anymore with the cached modules - under that path, because the contents of it might change. This - option is still somewhat experimental. If you want stability, - please don't use it. - :param bool cache_path: If given saves the parso cache in this - directory. If not given, defaults to the default cache places on - each platform. - - :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a - :py:class:`parso.python.tree.Module`. - """ - if 'start_pos' in kwargs: - raise TypeError("parse() got an unexpected keyword argument.") - return self._parse(code=code, **kwargs) - - def _parse(self, code=None, error_recovery=True, path=None, - start_symbol=None, cache=False, diff_cache=False, - cache_path=None, start_pos=(1, 0)): - """ - Wanted python3.5 * operator and keyword only arguments. Therefore just - wrap it all. - start_pos here is just a parameter internally used. Might be public - sometime in the future. - """ - if code is None and path is None: - raise TypeError("Please provide either code or a path.") - - if start_symbol is None: - start_symbol = self._start_symbol - - if error_recovery and start_symbol != 'file_input': - raise NotImplementedError("This is currently not implemented.") - - if cache and path is not None: - module_node = load_module(self._hashed, path, cache_path=cache_path) - if module_node is not None: - return module_node - - if code is None: - with open(path, 'rb') as f: - code = f.read() - - code = python_bytes_to_unicode(code) - - lines = split_lines(code, keepends=True) - if diff_cache: - if self._diff_parser is None: - raise TypeError("You have to define a diff parser to be able " - "to use this option.") - try: - module_cache_item = parser_cache[self._hashed][path] - except KeyError: - pass - else: - module_node = module_cache_item.node - old_lines = module_cache_item.lines - if old_lines == lines: - return module_node - - new_node = self._diff_parser( - self._pgen_grammar, self._tokenizer, module_node - ).update( - old_lines=old_lines, - new_lines=lines - ) - save_module(self._hashed, path, new_node, lines, - # Never pickle in pypy, it's slow as hell. - pickling=cache and not is_pypy, - cache_path=cache_path) - return new_node - - tokens = self._tokenizer(lines, start_pos) - - p = self._parser( - self._pgen_grammar, - error_recovery=error_recovery, - start_symbol=start_symbol - ) - root_node = p.parse(tokens=tokens) - - if cache or diff_cache: - save_module(self._hashed, path, root_node, lines, - # Never pickle in pypy, it's slow as hell. - pickling=cache and not is_pypy, - cache_path=cache_path) - return root_node - - def _get_token_namespace(self): - ns = self._token_namespace - if ns is None: - raise ValueError("The token namespace should be set.") - return ns - - def iter_errors(self, node): - """ - Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of - :py:class:`parso.normalizer.Issue` objects. For Python this is - a list of syntax/indentation errors. - """ - if self._error_normalizer_config is None: - raise ValueError("No error normalizer specified for this grammar.") - - return self._get_normalizer_issues(node, self._error_normalizer_config) - - def _get_normalizer(self, normalizer_config): - if normalizer_config is None: - normalizer_config = self._default_normalizer_config - if normalizer_config is None: - raise ValueError("You need to specify a normalizer, because " - "there's no default normalizer for this tree.") - return normalizer_config.create_normalizer(self) - - def _normalize(self, node, normalizer_config=None): - """ - TODO this is not public, yet. - The returned code will be normalized, e.g. PEP8 for Python. - """ - normalizer = self._get_normalizer(normalizer_config) - return normalizer.walk(node) - - def _get_normalizer_issues(self, node, normalizer_config=None): - normalizer = self._get_normalizer(normalizer_config) - normalizer.walk(node) - return normalizer.issues - - def __repr__(self): - labels = self._pgen_grammar.number2symbol.values() - txt = ' '.join(list(labels)[:3]) + ' ...' - return '<%s:%s>' % (self.__class__.__name__, txt) - - -class PythonGrammar(Grammar): - _error_normalizer_config = ErrorFinderConfig() - _token_namespace = token - _start_symbol = 'file_input' - - def __init__(self, version_info, bnf_text): - super(PythonGrammar, self).__init__( - bnf_text, - tokenizer=self._tokenize_lines, - parser=PythonParser, - diff_parser=DiffParser - ) - self.version_info = version_info - - def _tokenize_lines(self, lines, start_pos): - return tokenize_lines(lines, self.version_info, start_pos=start_pos) - - def _tokenize(self, code): - # Used by Jedi. - return tokenize(code, self.version_info) - - -def load_grammar(**kwargs): - """ - Loads a :py:class:`parso.Grammar`. The default version is the current Python - version. - - :param str version: A python version string, e.g. ``version='3.3'``. - """ - def load_grammar(language='python', version=None): - if language == 'python': - version_info = parse_version_string(version) - - file = os.path.join( - 'python', - 'grammar%s%s.txt' % (version_info.major, version_info.minor) - ) - - global _loaded_grammars - path = os.path.join(os.path.dirname(__file__), file) - try: - return _loaded_grammars[path] - except KeyError: - try: - with open(path) as f: - bnf_text = f.read() - - grammar = PythonGrammar(version_info, bnf_text) - return _loaded_grammars.setdefault(path, grammar) - except FileNotFoundError: - message = "Python version %s is currently not supported." % version - raise NotImplementedError(message) - else: - raise NotImplementedError("No support for language %s." % language) - - return load_grammar(**kwargs) diff --git a/pythonFiles/parso/normalizer.py b/pythonFiles/parso/normalizer.py deleted file mode 100644 index 9a3e82e24c87..000000000000 --- a/pythonFiles/parso/normalizer.py +++ /dev/null @@ -1,184 +0,0 @@ -from contextlib import contextmanager - -from parso._compatibility import use_metaclass - - -class _NormalizerMeta(type): - def __new__(cls, name, bases, dct): - new_cls = type.__new__(cls, name, bases, dct) - new_cls.rule_value_classes = {} - new_cls.rule_type_classes = {} - return new_cls - - -class Normalizer(use_metaclass(_NormalizerMeta)): - def __init__(self, grammar, config): - self.grammar = grammar - self._config = config - self.issues = [] - - self._rule_type_instances = self._instantiate_rules('rule_type_classes') - self._rule_value_instances = self._instantiate_rules('rule_value_classes') - - def _instantiate_rules(self, attr): - dct = {} - for base in type(self).mro(): - rules_map = getattr(base, attr, {}) - for type_, rule_classes in rules_map.items(): - new = [rule_cls(self) for rule_cls in rule_classes] - dct.setdefault(type_, []).extend(new) - return dct - - def walk(self, node): - self.initialize(node) - value = self.visit(node) - self.finalize() - return value - - def visit(self, node): - try: - children = node.children - except AttributeError: - return self.visit_leaf(node) - else: - with self.visit_node(node): - return ''.join(self.visit(child) for child in children) - - @contextmanager - def visit_node(self, node): - self._check_type_rules(node) - yield - - def _check_type_rules(self, node): - for rule in self._rule_type_instances.get(node.type, []): - rule.feed_node(node) - - def visit_leaf(self, leaf): - self._check_type_rules(leaf) - - for rule in self._rule_value_instances.get(leaf.value, []): - rule.feed_node(leaf) - - return leaf.prefix + leaf.value - - def initialize(self, node): - pass - - def finalize(self): - pass - - def add_issue(self, node, code, message): - issue = Issue(node, code, message) - if issue not in self.issues: - self.issues.append(issue) - return True - - @classmethod - def register_rule(cls, **kwargs): - """ - Use it as a class decorator:: - - normalizer = Normalizer('grammar', 'config') - @normalizer.register_rule(value='foo') - class MyRule(Rule): - error_code = 42 - """ - return cls._register_rule(**kwargs) - - @classmethod - def _register_rule(cls, value=None, values=(), type=None, types=()): - values = list(values) - types = list(types) - if value is not None: - values.append(value) - if type is not None: - types.append(type) - - if not values and not types: - raise ValueError("You must register at least something.") - - def decorator(rule_cls): - for v in values: - cls.rule_value_classes.setdefault(v, []).append(rule_cls) - for t in types: - cls.rule_type_classes.setdefault(t, []).append(rule_cls) - return rule_cls - - return decorator - - -class NormalizerConfig(object): - normalizer_class = Normalizer - - def create_normalizer(self, grammar): - if self.normalizer_class is None: - return None - - return self.normalizer_class(grammar, self) - - -class Issue(object): - def __init__(self, node, code, message): - self._node = node - self.code = code - """ - An integer code that stands for the type of error. - """ - self.message = message - """ - A message (string) for the issue. - """ - self.start_pos = node.start_pos - """ - The start position position of the error as a tuple (line, column). As - always in |parso| the first line is 1 and the first column 0. - """ - - def __eq__(self, other): - return self.start_pos == other.start_pos and self.code == other.code - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((self.code, self.start_pos)) - - def __repr__(self): - return '<%s: %s>' % (self.__class__.__name__, self.code) - - - -class Rule(object): - code = None - message = None - - def __init__(self, normalizer): - self._normalizer = normalizer - - def is_issue(self, node): - raise NotImplementedError() - - def get_node(self, node): - return node - - def _get_message(self, message): - if message is None: - message = self.message - if message is None: - raise ValueError("The message on the class is not set.") - return message - - def add_issue(self, node, code=None, message=None): - if code is None: - code = self.code - if code is None: - raise ValueError("The error code on the class is not set.") - - message = self._get_message(message) - - self._normalizer.add_issue(node, code, message) - - def feed_node(self, node): - if self.is_issue(node): - issue_node = self.get_node(node) - self.add_issue(issue_node) diff --git a/pythonFiles/parso/parser.py b/pythonFiles/parso/parser.py deleted file mode 100644 index 555ebc712f73..000000000000 --- a/pythonFiles/parso/parser.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -The ``Parser`` tries to convert the available Python code in an easy to read -format, something like an abstract syntax tree. The classes who represent this -tree, are sitting in the :mod:`parso.tree` module. - -The Python module ``tokenize`` is a very important part in the ``Parser``, -because it splits the code into different words (tokens). Sometimes it looks a -bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` -module for this? Well, ``ast`` does a very good job understanding proper Python -code, but fails to work as soon as there's a single line of broken code. - -There's one important optimization that needs to be known: Statements are not -being parsed completely. ``Statement`` is just a representation of the tokens -within the statement. This lowers memory usage and cpu time and reduces the -complexity of the ``Parser`` (there's another parser sitting inside -``Statement``, which produces ``Array`` and ``Call``). -""" -from parso import tree -from parso.pgen2.parse import PgenParser - - -class ParserSyntaxError(Exception): - """ - Contains error information about the parser tree. - - May be raised as an exception. - """ - def __init__(self, message, error_leaf): - self.message = message - self.error_leaf = error_leaf - - -class BaseParser(object): - node_map = {} - default_node = tree.Node - - leaf_map = { - } - default_leaf = tree.Leaf - - def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False): - self._pgen_grammar = pgen_grammar - self._start_symbol = start_symbol - self._error_recovery = error_recovery - - def parse(self, tokens): - start_number = self._pgen_grammar.symbol2number[self._start_symbol] - self.pgen_parser = PgenParser( - self._pgen_grammar, self.convert_node, self.convert_leaf, - self.error_recovery, start_number - ) - - node = self.pgen_parser.parse(tokens) - # The stack is empty now, we don't need it anymore. - del self.pgen_parser - return node - - def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback): - if self._error_recovery: - raise NotImplementedError("Error Recovery is not implemented") - else: - error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) - raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) - - def convert_node(self, pgen_grammar, type_, children): - # TODO REMOVE symbol, we don't want type here. - symbol = pgen_grammar.number2symbol[type_] - try: - return self.node_map[symbol](children) - except KeyError: - return self.default_node(symbol, children) - - def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos): - try: - return self.leaf_map[type_](value, start_pos, prefix) - except KeyError: - return self.default_leaf(value, start_pos, prefix) diff --git a/pythonFiles/parso/pgen2/__init__.py b/pythonFiles/parso/pgen2/__init__.py deleted file mode 100644 index 1ddae5fea9f7..000000000000 --- a/pythonFiles/parso/pgen2/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. diff --git a/pythonFiles/parso/pgen2/grammar.py b/pythonFiles/parso/pgen2/grammar.py deleted file mode 100644 index e5f211426fad..000000000000 --- a/pythonFiles/parso/pgen2/grammar.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -"""This module defines the data structures used to represent a grammar. - -These are a bit arcane because they are derived from the data -structures used by Python's 'pgen' parser generator. - -There's also a table here mapping operators to their names in the -token module; the Python tokenize module reports all operators as the -fallback token code OP, but the parser needs the actual token code. - -""" - -try: - import cPickle as pickle -except: - import pickle - - -class Grammar(object): - """Pgen parsing tables conversion class. - - Once initialized, this class supplies the grammar tables for the - parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). - - The load() method reads the tables from a pickle file, which is - much faster than the other ways offered by subclasses. The pickle - file is written by calling dump() (after loading the grammar - tables using a subclass). The report() method prints a readable - representation of the tables to stdout, for debugging. - - The instance variables are as follows: - - symbol2number -- a dict mapping symbol names to numbers. Symbol - numbers are always 256 or higher, to distinguish - them from token numbers, which are between 0 and - 255 (inclusive). - - number2symbol -- a dict mapping numbers to symbol names; - these two are each other's inverse. - - states -- a list of DFAs, where each DFA is a list of - states, each state is a list of arcs, and each - arc is a (i, j) pair where i is a label and j is - a state number. The DFA number is the index into - this list. (This name is slightly confusing.) - Final states are represented by a special arc of - the form (0, j) where j is its own state number. - - dfas -- a dict mapping symbol numbers to (DFA, first) - pairs, where DFA is an item from the states list - above, and first is a set of tokens that can - begin this grammar rule (represented by a dict - whose values are always 1). - - labels -- a list of (x, y) pairs where x is either a token - number or a symbol number, and y is either None - or a string; the strings are keywords. The label - number is the index in this list; label numbers - are used to mark state transitions (arcs) in the - DFAs. - - start -- the number of the grammar's start symbol. - - keywords -- a dict mapping keyword strings to arc labels. - - tokens -- a dict mapping token numbers to arc labels. - - """ - - def __init__(self, bnf_text): - self.symbol2number = {} - self.number2symbol = {} - self.states = [] - self.dfas = {} - self.labels = [(0, "EMPTY")] - self.keywords = {} - self.tokens = {} - self.symbol2label = {} - self.label2symbol = {} - self.start = 256 - - def dump(self, filename): - """Dump the grammar tables to a pickle file.""" - with open(filename, "wb") as f: - pickle.dump(self.__dict__, f, 2) - - def load(self, filename): - """Load the grammar tables from a pickle file.""" - with open(filename, "rb") as f: - d = pickle.load(f) - self.__dict__.update(d) - - def copy(self): - """ - Copy the grammar. - """ - new = self.__class__() - for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", - "tokens", "symbol2label"): - setattr(new, dict_attr, getattr(self, dict_attr).copy()) - new.labels = self.labels[:] - new.states = self.states[:] - new.start = self.start - return new - - def report(self): - """Dump the grammar tables to standard output, for debugging.""" - from pprint import pprint - print("s2n") - pprint(self.symbol2number) - print("n2s") - pprint(self.number2symbol) - print("states") - pprint(self.states) - print("dfas") - pprint(self.dfas) - print("labels") - pprint(self.labels) - print("start", self.start) diff --git a/pythonFiles/parso/pgen2/parse.py b/pythonFiles/parso/pgen2/parse.py deleted file mode 100644 index aaacfcebe44e..000000000000 --- a/pythonFiles/parso/pgen2/parse.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -""" -Parser engine for the grammar tables generated by pgen. - -The grammar table must be loaded first. - -See Parser/parser.c in the Python distribution for additional info on -how this parsing engine works. -""" - -from parso.python import tokenize - - -class InternalParseError(Exception): - """ - Exception to signal the parser is stuck and error recovery didn't help. - Basically this shouldn't happen. It's a sign that something is really - wrong. - """ - - def __init__(self, msg, type, value, start_pos): - Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % - (msg, tokenize.tok_name[type], value, start_pos)) - self.msg = msg - self.type = type - self.value = value - self.start_pos = start_pos - - -class Stack(list): - def get_tos_nodes(self): - tos = self[-1] - return tos[2][1] - - -def token_to_ilabel(grammar, type_, value): - # Map from token to label - if type_ == tokenize.NAME: - # Check for reserved words (keywords) - try: - return grammar.keywords[value] - except KeyError: - pass - - try: - return grammar.tokens[type_] - except KeyError: - return None - - -class PgenParser(object): - """Parser engine. - - The proper usage sequence is: - - p = Parser(grammar, [converter]) # create instance - p.setup([start]) # prepare for parsing - : - if p.add_token(...): # parse a token - break - root = p.rootnode # root of abstract syntax tree - - A Parser instance may be reused by calling setup() repeatedly. - - A Parser instance contains state pertaining to the current token - sequence, and should not be used concurrently by different threads - to parse separate token sequences. - - See driver.py for how to get input tokens by tokenizing a file or - string. - - Parsing is complete when add_token() returns True; the root of the - abstract syntax tree can then be retrieved from the rootnode - instance variable. When a syntax error occurs, error_recovery() - is called. There is no error recovery; the parser cannot be used - after a syntax error was reported (but it can be reinitialized by - calling setup()). - - """ - - def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start): - """Constructor. - - The grammar argument is a grammar.Grammar instance; see the - grammar module for more information. - - The parser is not ready yet for parsing; you must call the - setup() method to get it started. - - The optional convert argument is a function mapping concrete - syntax tree nodes to abstract syntax tree nodes. If not - given, no conversion is done and the syntax tree produced is - the concrete syntax tree. If given, it must be a function of - two arguments, the first being the grammar (a grammar.Grammar - instance), and the second being the concrete syntax tree node - to be converted. The syntax tree is converted from the bottom - up. - - A concrete syntax tree node is a (type, nodes) tuple, where - type is the node type (a token or symbol number) and nodes - is a list of children for symbols, and None for tokens. - - An abstract syntax tree node may be anything; this is entirely - up to the converter function. - - """ - self.grammar = grammar - self.convert_node = convert_node - self.convert_leaf = convert_leaf - - # Each stack entry is a tuple: (dfa, state, node). - # A node is a tuple: (type, children), - # where children is a list of nodes or None - newnode = (start, []) - stackentry = (self.grammar.dfas[start], 0, newnode) - self.stack = Stack([stackentry]) - self.rootnode = None - self.error_recovery = error_recovery - - def parse(self, tokens): - for type_, value, start_pos, prefix in tokens: - if self.add_token(type_, value, start_pos, prefix): - break - else: - # We never broke out -- EOF is too soon -- Unfinished statement. - # However, the error recovery might have added the token again, if - # the stack is empty, we're fine. - if self.stack: - raise InternalParseError("incomplete input", type_, value, start_pos) - return self.rootnode - - def add_token(self, type_, value, start_pos, prefix): - """Add a token; return True if this is the end of the program.""" - ilabel = token_to_ilabel(self.grammar, type_, value) - - # Loop until the token is shifted; may raise exceptions - _gram = self.grammar - _labels = _gram.labels - _push = self._push - _pop = self._pop - _shift = self._shift - while True: - dfa, state, node = self.stack[-1] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - t, v = _labels[i] - if ilabel == i: - # Look it up in the list of labels - assert t < 256 - # Shift a token; we're done with it - _shift(type_, value, newstate, prefix, start_pos) - # Pop while we are in an accept-only state - state = newstate - while states[state] == [(0, state)]: - _pop() - if not self.stack: - # Done parsing! - return True - dfa, state, node = self.stack[-1] - states, first = dfa - # Done with this token - return False - elif t >= 256: - # See if it's a symbol and if we're in its first set - itsdfa = _gram.dfas[t] - itsstates, itsfirst = itsdfa - if ilabel in itsfirst: - # Push a symbol - _push(t, itsdfa, newstate) - break # To continue the outer while loop - else: - if (0, state) in arcs: - # An accepting state, pop it and try something else - _pop() - if not self.stack: - # Done parsing, but another token is input - raise InternalParseError("too much input", type_, value, start_pos) - else: - self.error_recovery(self.grammar, self.stack, arcs, type_, - value, start_pos, prefix, self.add_token) - break - - def _shift(self, type_, value, newstate, prefix, start_pos): - """Shift a token. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) - node[-1].append(newnode) - self.stack[-1] = (dfa, newstate, node) - - def _push(self, type_, newdfa, newstate): - """Push a nonterminal. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = (type_, []) - self.stack[-1] = (dfa, newstate, node) - self.stack.append((newdfa, 0, newnode)) - - def _pop(self): - """Pop a nonterminal. (Internal)""" - popdfa, popstate, (type_, children) = self.stack.pop() - # If there's exactly one child, return that child instead of creating a - # new node. We still create expr_stmt and file_input though, because a - # lot of Jedi depends on its logic. - if len(children) == 1: - newnode = children[0] - else: - newnode = self.convert_node(self.grammar, type_, children) - - try: - # Equal to: - # dfa, state, node = self.stack[-1] - # symbol, children = node - self.stack[-1][2][1].append(newnode) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = newnode diff --git a/pythonFiles/parso/pgen2/pgen.py b/pythonFiles/parso/pgen2/pgen.py deleted file mode 100644 index a3e39fa5fe74..000000000000 --- a/pythonFiles/parso/pgen2/pgen.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2014 David Halter. Integration into Jedi. -# Modifications are dual-licensed: MIT and PSF. - -from parso.pgen2 import grammar -from parso.python import token -from parso.python import tokenize -from parso.utils import parse_version_string - - -class ParserGenerator(object): - def __init__(self, bnf_text, token_namespace): - self._bnf_text = bnf_text - self.generator = tokenize.tokenize( - bnf_text, - version_info=parse_version_string('3.6') - ) - self._gettoken() # Initialize lookahead - self.dfas, self.startsymbol = self._parse() - self.first = {} # map from symbol name to set of tokens - self._addfirstsets() - self._token_namespace = token_namespace - - def make_grammar(self): - c = grammar.Grammar(self._bnf_text) - names = list(self.dfas.keys()) - names.sort() - # TODO do we still need this? - names.remove(self.startsymbol) - names.insert(0, self.startsymbol) - for name in names: - i = 256 + len(c.symbol2number) - c.symbol2number[name] = i - c.number2symbol[i] = name - for name in names: - dfa = self.dfas[name] - states = [] - for state in dfa: - arcs = [] - for label, next in state.arcs.items(): - arcs.append((self._make_label(c, label), dfa.index(next))) - if state.isfinal: - arcs.append((0, dfa.index(state))) - states.append(arcs) - c.states.append(states) - c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name)) - c.start = c.symbol2number[self.startsymbol] - return c - - def _make_first(self, c, name): - rawfirst = self.first[name] - first = {} - for label in rawfirst: - ilabel = self._make_label(c, label) - ##assert ilabel not in first # XXX failed on <> ... != - first[ilabel] = 1 - return first - - def _make_label(self, c, label): - # XXX Maybe this should be a method on a subclass of converter? - ilabel = len(c.labels) - if label[0].isalpha(): - # Either a symbol name or a named token - if label in c.symbol2number: - # A symbol name (a non-terminal) - if label in c.symbol2label: - return c.symbol2label[label] - else: - c.labels.append((c.symbol2number[label], None)) - c.symbol2label[label] = ilabel - c.label2symbol[ilabel] = label - return ilabel - else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(self._token_namespace, label, None) - assert isinstance(itoken, int), label - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - value = eval(label) - if value[0].isalpha(): - # A keyword - if value in c.keywords: - return c.keywords[value] - else: - # TODO this might be an issue?! Using token.NAME here? - c.labels.append((token.NAME, value)) - c.keywords[value] = ilabel - return ilabel - else: - # An operator (any non-numeric token) - itoken = self._token_namespace.generate_token_id(value) - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - - def _addfirstsets(self): - names = list(self.dfas.keys()) - names.sort() - for name in names: - if name not in self.first: - self._calcfirst(name) - #print name, self.first[name].keys() - - def _calcfirst(self, name): - dfa = self.dfas[name] - self.first[name] = None # dummy to detect left recursion - state = dfa[0] - totalset = {} - overlapcheck = {} - for label, next in state.arcs.items(): - if label in self.dfas: - if label in self.first: - fset = self.first[label] - if fset is None: - raise ValueError("recursion for rule %r" % name) - else: - self._calcfirst(label) - fset = self.first[label] - totalset.update(fset) - overlapcheck[label] = fset - else: - totalset[label] = 1 - overlapcheck[label] = {label: 1} - inverse = {} - for label, itsfirst in overlapcheck.items(): - for symbol in itsfirst: - if symbol in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (name, symbol, label, inverse[symbol])) - inverse[symbol] = label - self.first[name] = totalset - - def _parse(self): - dfas = {} - startsymbol = None - # MSTART: (NEWLINE | RULE)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: - self._gettoken() - # RULE: NAME ':' RHS NEWLINE - name = self._expect(token.NAME) - self._expect(token.COLON) - a, z = self._parse_rhs() - self._expect(token.NEWLINE) - #self._dump_nfa(name, a, z) - dfa = self._make_dfa(a, z) - #self._dump_dfa(name, dfa) - # oldlen = len(dfa) - self._simplify_dfa(dfa) - # newlen = len(dfa) - dfas[name] = dfa - #print name, oldlen, newlen - if startsymbol is None: - startsymbol = name - return dfas, startsymbol - - def _make_dfa(self, start, finish): - # To turn an NFA into a DFA, we define the states of the DFA - # to correspond to *sets* of states of the NFA. Then do some - # state reduction. Let's represent sets as dicts with 1 for - # values. - assert isinstance(start, NFAState) - assert isinstance(finish, NFAState) - - def closure(state): - base = {} - addclosure(state, base) - return base - - def addclosure(state, base): - assert isinstance(state, NFAState) - if state in base: - return - base[state] = 1 - for label, next in state.arcs: - if label is None: - addclosure(next, base) - - states = [DFAState(closure(start), finish)] - for state in states: # NB states grows while we're iterating - arcs = {} - for nfastate in state.nfaset: - for label, next in nfastate.arcs: - if label is not None: - addclosure(next, arcs.setdefault(label, {})) - for label, nfaset in arcs.items(): - for st in states: - if st.nfaset == nfaset: - break - else: - st = DFAState(nfaset, finish) - states.append(st) - state.addarc(st, label) - return states # List of DFAState instances; first one is start - - def _dump_nfa(self, name, start, finish): - print("Dump of NFA for", name) - todo = [start] - for i, state in enumerate(todo): - print(" State", i, state is finish and "(final)" or "") - for label, next in state.arcs: - if next in todo: - j = todo.index(next) - else: - j = len(todo) - todo.append(next) - if label is None: - print(" -> %d" % j) - else: - print(" %s -> %d" % (label, j)) - - def _dump_dfa(self, name, dfa): - print("Dump of DFA for", name) - for i, state in enumerate(dfa): - print(" State", i, state.isfinal and "(final)" or "") - for label, next in state.arcs.items(): - print(" %s -> %d" % (label, dfa.index(next))) - - def _simplify_dfa(self, dfa): - # This is not theoretically optimal, but works well enough. - # Algorithm: repeatedly look for two states that have the same - # set of arcs (same labels pointing to the same nodes) and - # unify them, until things stop changing. - - # dfa is a list of DFAState instances - changes = True - while changes: - changes = False - for i, state_i in enumerate(dfa): - for j in range(i + 1, len(dfa)): - state_j = dfa[j] - if state_i == state_j: - #print " unify", i, j - del dfa[j] - for state in dfa: - state.unifystate(state_j, state_i) - changes = True - break - - def _parse_rhs(self): - # RHS: ALT ('|' ALT)* - a, z = self._parse_alt() - if self.value != "|": - return a, z - else: - aa = NFAState() - zz = NFAState() - aa.addarc(a) - z.addarc(zz) - while self.value == "|": - self._gettoken() - a, z = self._parse_alt() - aa.addarc(a) - z.addarc(zz) - return aa, zz - - def _parse_alt(self): - # ALT: ITEM+ - a, b = self._parse_item() - while (self.value in ("(", "[") or - self.type in (token.NAME, token.STRING)): - c, d = self._parse_item() - b.addarc(c) - b = d - return a, b - - def _parse_item(self): - # ITEM: '[' RHS ']' | ATOM ['+' | '*'] - if self.value == "[": - self._gettoken() - a, z = self._parse_rhs() - self._expect(token.RSQB) - a.addarc(z) - return a, z - else: - a, z = self._parse_atom() - value = self.value - if value not in ("+", "*"): - return a, z - self._gettoken() - z.addarc(a) - if value == "+": - return a, z - else: - return a, a - - def _parse_atom(self): - # ATOM: '(' RHS ')' | NAME | STRING - if self.value == "(": - self._gettoken() - a, z = self._parse_rhs() - self._expect(token.RPAR) - return a, z - elif self.type in (token.NAME, token.STRING): - a = NFAState() - z = NFAState() - a.addarc(z, self.value) - self._gettoken() - return a, z - else: - self._raise_error("expected (...) or NAME or STRING, got %s/%s", - self.type, self.value) - - def _expect(self, type): - if self.type != type: - self._raise_error("expected %s(%s), got %s(%s)", - type, token.tok_name[type], self.type, self.value) - value = self.value - self._gettoken() - return value - - def _gettoken(self): - tup = next(self.generator) - while tup[0] in (token.COMMENT, token.NL): - tup = next(self.generator) - self.type, self.value, self.begin, prefix = tup - - def _raise_error(self, msg, *args): - if args: - try: - msg = msg % args - except: - msg = " ".join([msg] + list(map(str, args))) - line = self._bnf_text.splitlines()[self.begin[0] - 1] - raise SyntaxError(msg, ('', self.begin[0], - self.begin[1], line)) - - -class NFAState(object): - def __init__(self): - self.arcs = [] # list of (label, NFAState) pairs - - def addarc(self, next, label=None): - assert label is None or isinstance(label, str) - assert isinstance(next, NFAState) - self.arcs.append((label, next)) - - -class DFAState(object): - def __init__(self, nfaset, final): - assert isinstance(nfaset, dict) - assert isinstance(next(iter(nfaset)), NFAState) - assert isinstance(final, NFAState) - self.nfaset = nfaset - self.isfinal = final in nfaset - self.arcs = {} # map from label to DFAState - - def addarc(self, next, label): - assert isinstance(label, str) - assert label not in self.arcs - assert isinstance(next, DFAState) - self.arcs[label] = next - - def unifystate(self, old, new): - for label, next in self.arcs.items(): - if next is old: - self.arcs[label] = new - - def __eq__(self, other): - # Equality test -- ignore the nfaset instance variable - assert isinstance(other, DFAState) - if self.isfinal != other.isfinal: - return False - # Can't just return self.arcs == other.arcs, because that - # would invoke this method recursively, with cycles... - if len(self.arcs) != len(other.arcs): - return False - for label, next in self.arcs.items(): - if next is not other.arcs.get(label): - return False - return True - - __hash__ = None # For Py3 compatibility. - - -def generate_grammar(bnf_text, token_namespace): - """ - ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for - at-least-once repetition, [] for optional parts, | for alternatives and () - for grouping). - - It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its - own parser. - """ - p = ParserGenerator(bnf_text, token_namespace) - return p.make_grammar() diff --git a/pythonFiles/parso/python/__init__.py b/pythonFiles/parso/python/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/pythonFiles/parso/python/diff.py b/pythonFiles/parso/python/diff.py deleted file mode 100644 index 96c6e5f2ca41..000000000000 --- a/pythonFiles/parso/python/diff.py +++ /dev/null @@ -1,593 +0,0 @@ -""" -Basically a contains parser that is faster, because it tries to parse only -parts and if anything changes, it only reparses the changed parts. - -It works with a simple diff in the beginning and will try to reuse old parser -fragments. -""" -import re -import difflib -from collections import namedtuple -import logging - -from parso.utils import split_lines -from parso.python.parser import Parser -from parso.python.tree import EndMarker -from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, - ENDMARKER, INDENT, DEDENT) - -LOG = logging.getLogger(__name__) - - -def _get_last_line(node_or_leaf): - last_leaf = node_or_leaf.get_last_leaf() - if _ends_with_newline(last_leaf): - return last_leaf.start_pos[0] - else: - return last_leaf.end_pos[0] - - -def _ends_with_newline(leaf, suffix=''): - if leaf.type == 'error_leaf': - typ = leaf.original_type - else: - typ = leaf.type - - return typ == 'newline' or suffix.endswith('\n') - - -def _flows_finished(pgen_grammar, stack): - """ - if, while, for and try might not be finished, because another part might - still be parsed. - """ - for dfa, newstate, (symbol_number, nodes) in stack: - if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt', - 'for_stmt', 'try_stmt'): - return False - return True - - -def suite_or_file_input_is_valid(pgen_grammar, stack): - if not _flows_finished(pgen_grammar, stack): - return False - - for dfa, newstate, (symbol_number, nodes) in reversed(stack): - if pgen_grammar.number2symbol[symbol_number] == 'suite': - # If only newline is in the suite, the suite is not valid, yet. - return len(nodes) > 1 - # Not reaching a suite means that we're dealing with file_input levels - # where there's no need for a valid statement in it. It can also be empty. - return True - - -def _is_flow_node(node): - try: - value = node.children[0].value - except AttributeError: - return False - return value in ('if', 'for', 'while', 'try') - - -class _PositionUpdatingFinished(Exception): - pass - - -def _update_positions(nodes, line_offset, last_leaf): - for node in nodes: - try: - children = node.children - except AttributeError: - # Is a leaf - node.line += line_offset - if node is last_leaf: - raise _PositionUpdatingFinished - else: - _update_positions(children, line_offset, last_leaf) - - -class DiffParser(object): - """ - An advanced form of parsing a file faster. Unfortunately comes with huge - side effects. It changes the given module. - """ - def __init__(self, pgen_grammar, tokenizer, module): - self._pgen_grammar = pgen_grammar - self._tokenizer = tokenizer - self._module = module - - def _reset(self): - self._copy_count = 0 - self._parser_count = 0 - - self._nodes_stack = _NodesStack(self._module) - - def update(self, old_lines, new_lines): - ''' - The algorithm works as follows: - - Equal: - - Assure that the start is a newline, otherwise parse until we get - one. - - Copy from parsed_until_line + 1 to max(i2 + 1) - - Make sure that the indentation is correct (e.g. add DEDENT) - - Add old and change positions - Insert: - - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not - much more. - - Returns the new module node. - ''' - LOG.debug('diff parser start') - # Reset the used names cache so they get regenerated. - self._module._used_names = None - - self._parser_lines_new = new_lines - - self._reset() - - line_length = len(new_lines) - sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new) - opcodes = sm.get_opcodes() - LOG.debug('diff parser calculated') - LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length)) - - for operation, i1, i2, j1, j2 in opcodes: - LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]', - operation, i1 + 1, i2, j1 + 1, j2) - - if j2 == line_length and new_lines[-1] == '': - # The empty part after the last newline is not relevant. - j2 -= 1 - - if operation == 'equal': - line_offset = j1 - i1 - self._copy_from_old_parser(line_offset, i2, j2) - elif operation == 'replace': - self._parse(until_line=j2) - elif operation == 'insert': - self._parse(until_line=j2) - else: - assert operation == 'delete' - - # With this action all change will finally be applied and we have a - # changed module. - self._nodes_stack.close() - - last_pos = self._module.end_pos[0] - if last_pos != line_length: - current_lines = split_lines(self._module.get_code(), keepends=True) - diff = difflib.unified_diff(current_lines, new_lines) - raise Exception( - "There's an issue (%s != %s) with the diff parser. Please report:\n%s" - % (last_pos, line_length, ''.join(diff)) - ) - - LOG.debug('diff parser end') - return self._module - - def _enabled_debugging(self, old_lines, lines_new): - if self._module.get_code() != ''.join(lines_new): - LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), - ''.join(lines_new)) - - def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): - copied_nodes = [None] - - last_until_line = -1 - while until_line_new > self._nodes_stack.parsed_until_line: - parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset - line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) - if line_stmt is None: - # Parse 1 line at least. We don't need more, because we just - # want to get into a state where the old parser has statements - # again that can be copied (e.g. not lines within parentheses). - self._parse(self._nodes_stack.parsed_until_line + 1) - elif not copied_nodes: - # We have copied as much as possible (but definitely not too - # much). Therefore we just parse the rest. - # We might not reach the end, because there's a statement - # that is not finished. - self._parse(until_line_new) - else: - p_children = line_stmt.parent.children - index = p_children.index(line_stmt) - - copied_nodes = self._nodes_stack.copy_nodes( - p_children[index:], - until_line_old, - line_offset - ) - # Match all the nodes that are in the wanted range. - if copied_nodes: - self._copy_count += 1 - - from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset - to = self._nodes_stack.parsed_until_line - - LOG.debug('diff actually copy %s to %s', from_, to) - # Since there are potential bugs that might loop here endlessly, we - # just stop here. - assert last_until_line != self._nodes_stack.parsed_until_line \ - or not copied_nodes, last_until_line - last_until_line = self._nodes_stack.parsed_until_line - - def _get_old_line_stmt(self, old_line): - leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) - - if _ends_with_newline(leaf): - leaf = leaf.get_next_leaf() - if leaf.get_start_pos_of_prefix()[0] == old_line: - node = leaf - while node.parent.type not in ('file_input', 'suite'): - node = node.parent - return node - # Must be on the same line. Otherwise we need to parse that bit. - return None - - def _get_before_insertion_node(self): - if self._nodes_stack.is_empty(): - return None - - line = self._nodes_stack.parsed_until_line + 1 - node = self._new_module.get_last_leaf() - while True: - parent = node.parent - if parent.type in ('suite', 'file_input'): - assert node.end_pos[0] <= line - assert node.end_pos[1] == 0 or '\n' in self._prefix - return node - node = parent - - def _parse(self, until_line): - """ - Parses at least until the given line, but might just parse more until a - valid state is reached. - """ - last_until_line = 0 - while until_line > self._nodes_stack.parsed_until_line: - node = self._try_parse_part(until_line) - nodes = node.children - - self._nodes_stack.add_parsed_nodes(nodes) - LOG.debug( - 'parse_part from %s to %s (to %s in part parser)', - nodes[0].get_start_pos_of_prefix()[0], - self._nodes_stack.parsed_until_line, - node.end_pos[0] - 1 - ) - # Since the tokenizer sometimes has bugs, we cannot be sure that - # this loop terminates. Therefore assert that there's always a - # change. - assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line - last_until_line = self._nodes_stack.parsed_until_line - - def _try_parse_part(self, until_line): - """ - Sets up a normal parser that uses a spezialized tokenizer to only parse - until a certain position (or a bit longer if the statement hasn't - ended. - """ - self._parser_count += 1 - # TODO speed up, shouldn't copy the whole list all the time. - # memoryview? - parsed_until_line = self._nodes_stack.parsed_until_line - lines_after = self._parser_lines_new[parsed_until_line:] - #print('parse_content', parsed_until_line, lines_after, until_line) - tokens = self._diff_tokenize( - lines_after, - until_line, - line_offset=parsed_until_line - ) - self._active_parser = Parser( - self._pgen_grammar, - error_recovery=True - ) - return self._active_parser.parse(tokens=tokens) - - def _diff_tokenize(self, lines, until_line, line_offset=0): - is_first_token = True - omitted_first_indent = False - indents = [] - tokens = self._tokenizer(lines, (1, 0)) - stack = self._active_parser.pgen_parser.stack - for typ, string, start_pos, prefix in tokens: - start_pos = start_pos[0] + line_offset, start_pos[1] - if typ == INDENT: - indents.append(start_pos[1]) - if is_first_token: - omitted_first_indent = True - # We want to get rid of indents that are only here because - # we only parse part of the file. These indents would only - # get parsed as error leafs, which doesn't make any sense. - is_first_token = False - continue - is_first_token = False - - # In case of omitted_first_indent, it might not be dedented fully. - # However this is a sign for us that a dedent happened. - if typ == DEDENT \ - or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1: - indents.pop() - if omitted_first_indent and not indents: - # We are done here, only thing that can come now is an - # endmarker or another dedented code block. - typ, string, start_pos, prefix = next(tokens) - if '\n' in prefix: - prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) - else: - prefix = '' - yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) - break - elif typ == NEWLINE and start_pos[0] >= until_line: - yield PythonToken(typ, string, start_pos, prefix) - # Check if the parser is actually in a valid suite state. - if suite_or_file_input_is_valid(self._pgen_grammar, stack): - start_pos = start_pos[0] + 1, 0 - while len(indents) > int(omitted_first_indent): - indents.pop() - yield PythonToken(DEDENT, '', start_pos, '') - - yield PythonToken(ENDMARKER, '', start_pos, '') - break - else: - continue - - yield PythonToken(typ, string, start_pos, prefix) - - -class _NodesStackNode(object): - ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf') - - def __init__(self, tree_node, parent=None): - self.tree_node = tree_node - self.children_groups = [] - self.parent = parent - - def close(self): - children = [] - for children_part, line_offset, last_line_offset_leaf in self.children_groups: - if line_offset != 0: - try: - _update_positions( - children_part, line_offset, last_line_offset_leaf) - except _PositionUpdatingFinished: - pass - children += children_part - self.tree_node.children = children - # Reset the parents - for node in children: - node.parent = self.tree_node - - def add(self, children, line_offset=0, last_line_offset_leaf=None): - group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf) - self.children_groups.append(group) - - def get_last_line(self, suffix): - line = 0 - if self.children_groups: - children_group = self.children_groups[-1] - last_leaf = children_group.children[-1].get_last_leaf() - line = last_leaf.end_pos[0] - - # Calculate the line offsets - offset = children_group.line_offset - if offset: - # In case the line_offset is not applied to this specific leaf, - # just ignore it. - if last_leaf.line <= children_group.last_line_offset_leaf.line: - line += children_group.line_offset - - # Newlines end on the next line, which means that they would cover - # the next line. That line is not fully parsed at this point. - if _ends_with_newline(last_leaf, suffix): - line -= 1 - line += suffix.count('\n') - if suffix and not suffix.endswith('\n'): - # This is the end of a file (that doesn't end with a newline). - line += 1 - return line - - -class _NodesStack(object): - endmarker_type = 'endmarker' - - def __init__(self, module): - # Top of stack - self._tos = self._base_node = _NodesStackNode(module) - self._module = module - self._last_prefix = '' - self.prefix = '' - - def is_empty(self): - return not self._base_node.children - - @property - def parsed_until_line(self): - return self._tos.get_last_line(self.prefix) - - def _get_insertion_node(self, indentation_node): - indentation = indentation_node.start_pos[1] - - # find insertion node - node = self._tos - while True: - tree_node = node.tree_node - if tree_node.type == 'suite': - # A suite starts with NEWLINE, ... - node_indentation = tree_node.children[1].start_pos[1] - - if indentation >= node_indentation: # Not a Dedent - # We might be at the most outer layer: modules. We - # don't want to depend on the first statement - # having the right indentation. - return node - - elif tree_node.type == 'file_input': - return node - - node = self._close_tos() - - def _close_tos(self): - self._tos.close() - self._tos = self._tos.parent - return self._tos - - def add_parsed_nodes(self, tree_nodes): - tree_nodes = self._remove_endmarker(tree_nodes) - if not tree_nodes: - return - - assert tree_nodes[0].type != 'newline' - - node = self._get_insertion_node(tree_nodes[0]) - assert node.tree_node.type in ('suite', 'file_input') - node.add(tree_nodes) - self._update_tos(tree_nodes[-1]) - - def _remove_endmarker(self, tree_nodes): - """ - Helps cleaning up the tree nodes that get inserted. - """ - last_leaf = tree_nodes[-1].get_last_leaf() - is_endmarker = last_leaf.type == self.endmarker_type - self._last_prefix = '' - if is_endmarker: - try: - separation = last_leaf.prefix.rindex('\n') + 1 - except ValueError: - pass - else: - # Remove the whitespace part of the prefix after a newline. - # That is not relevant if parentheses were opened. Always parse - # until the end of a line. - last_leaf.prefix, self._last_prefix = \ - last_leaf.prefix[:separation], last_leaf.prefix[separation:] - - first_leaf = tree_nodes[0].get_first_leaf() - first_leaf.prefix = self.prefix + first_leaf.prefix - self.prefix = '' - - if is_endmarker: - self.prefix = last_leaf.prefix - - tree_nodes = tree_nodes[:-1] - return tree_nodes - - def copy_nodes(self, tree_nodes, until_line, line_offset): - """ - Copies tree nodes from the old parser tree. - - Returns the number of tree nodes that were copied. - """ - tos = self._get_insertion_node(tree_nodes[0]) - - new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset) - return new_nodes - - def _copy_nodes(self, tos, nodes, until_line, line_offset): - new_nodes = [] - - new_tos = tos - for node in nodes: - if node.type == 'endmarker': - # We basically removed the endmarker, but we are not allowed to - # remove the newline at the end of the line, otherwise it's - # going to be missing. - try: - self.prefix = node.prefix[:node.prefix.rindex('\n') + 1] - except ValueError: - pass - # Endmarkers just distort all the checks below. Remove them. - break - - if node.start_pos[0] > until_line: - break - # TODO this check might take a bit of time for large files. We - # might want to change this to do more intelligent guessing or - # binary search. - if _get_last_line(node) > until_line: - # We can split up functions and classes later. - if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite': - new_nodes.append(node) - break - - new_nodes.append(node) - - if not new_nodes: - return [], tos - - last_node = new_nodes[-1] - line_offset_index = -1 - if last_node.type in ('classdef', 'funcdef'): - suite = last_node.children[-1] - if suite.type == 'suite': - suite_tos = _NodesStackNode(suite) - # Don't need to pass line_offset here, it's already done by the - # parent. - suite_nodes, recursive_tos = self._copy_nodes( - suite_tos, suite.children, until_line, line_offset) - if len(suite_nodes) < 2: - # A suite only with newline is not valid. - new_nodes.pop() - else: - suite_tos.parent = tos - new_tos = recursive_tos - line_offset_index = -2 - - elif (new_nodes[-1].type in ('error_leaf', 'error_node') or - _is_flow_node(new_nodes[-1])): - # Error leafs/nodes don't have a defined start/end. Error - # nodes might not end with a newline (e.g. if there's an - # open `(`). Therefore ignore all of them unless they are - # succeeded with valid parser state. - # If we copy flows at the end, they might be continued - # after the copy limit (in the new parser). - # In this while loop we try to remove until we find a newline. - new_nodes.pop() - while new_nodes: - last_node = new_nodes[-1] - if last_node.get_last_leaf().type == 'newline': - break - new_nodes.pop() - - if new_nodes: - try: - last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf() - except IndexError: - line_offset = 0 - # In this case we don't have to calculate an offset, because - # there's no children to be managed. - last_line_offset_leaf = None - tos.add(new_nodes, line_offset, last_line_offset_leaf) - return new_nodes, new_tos - - def _update_tos(self, tree_node): - if tree_node.type in ('suite', 'file_input'): - self._tos = _NodesStackNode(tree_node, self._tos) - self._tos.add(list(tree_node.children)) - self._update_tos(tree_node.children[-1]) - elif tree_node.type in ('classdef', 'funcdef'): - self._update_tos(tree_node.children[-1]) - - def close(self): - while self._tos is not None: - self._close_tos() - - # Add an endmarker. - try: - last_leaf = self._module.get_last_leaf() - end_pos = list(last_leaf.end_pos) - except IndexError: - end_pos = [1, 0] - lines = split_lines(self.prefix) - assert len(lines) > 0 - if len(lines) == 1: - end_pos[1] += len(lines[0]) - else: - end_pos[0] += len(lines) - 1 - end_pos[1] = len(lines[-1]) - - endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix) - endmarker.parent = self._module - self._module.children.append(endmarker) diff --git a/pythonFiles/parso/python/errors.py b/pythonFiles/parso/python/errors.py deleted file mode 100644 index cfb8380ea743..000000000000 --- a/pythonFiles/parso/python/errors.py +++ /dev/null @@ -1,994 +0,0 @@ -# -*- coding: utf-8 -*- -import codecs -import warnings -import re -from contextlib import contextmanager - -from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule -from parso.python.tree import search_ancestor -from parso.parser import ParserSyntaxError - -_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') -_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist') -# This is the maximal block size given by python. -_MAX_BLOCK_SIZE = 20 -_MAX_INDENT_COUNT = 100 -ALLOWED_FUTURES = ( - 'all_feature_names', 'nested_scopes', 'generators', 'division', - 'absolute_import', 'with_statement', 'print_function', 'unicode_literals', -) - - -def _iter_stmts(scope): - """ - Iterates over all statements and splits up simple_stmt. - """ - for child in scope.children: - if child.type == 'simple_stmt': - for child2 in child.children: - if child2.type == 'newline' or child2 == ';': - continue - yield child2 - else: - yield child - - -def _get_comprehension_type(atom): - first, second = atom.children[:2] - if second.type == 'testlist_comp' and second.children[1].type == 'comp_for': - if first == '[': - return 'list comprehension' - else: - return 'generator expression' - elif second.type == 'dictorsetmaker' and second.children[-1].type == 'comp_for': - if second.children[1] == ':': - return 'dict comprehension' - else: - return 'set comprehension' - return None - - -def _is_future_import(import_from): - # It looks like a __future__ import that is relative is still a future - # import. That feels kind of odd, but whatever. - # if import_from.level != 0: - # return False - from_names = import_from.get_from_names() - return [n.value for n in from_names] == ['__future__'] - - -def _remove_parens(atom): - """ - Returns the inner part of an expression like `(foo)`. Also removes nested - parens. - """ - try: - children = atom.children - except AttributeError: - pass - else: - if len(children) == 3 and children[0] == '(': - return _remove_parens(atom.children[1]) - return atom - - -def _iter_params(parent_node): - return (n for n in parent_node.children if n.type == 'param') - - -def _is_future_import_first(import_from): - """ - Checks if the import is the first statement of a file. - """ - found_docstring = False - for stmt in _iter_stmts(import_from.get_root_node()): - if stmt.type == 'string' and not found_docstring: - continue - found_docstring = True - - if stmt == import_from: - return True - if stmt.type == 'import_from' and _is_future_import(stmt): - continue - return False - - -def _iter_definition_exprs_from_lists(exprlist): - for child in exprlist.children[::2]: - if child.type == 'atom' and child.children[0] in ('(', '['): - testlist_comp = child.children[0] - if testlist_comp.type == 'testlist_comp': - for expr in _iter_definition_exprs_from_lists(testlist_comp): - yield expr - continue - elif child.children[0] == '[': - yield testlist_comp - continue - - yield child - -def _get_expr_stmt_definition_exprs(expr_stmt): - exprs = [] - for list_ in expr_stmt.children[:-2:2]: - if list_.type in ('testlist_star_expr', 'testlist'): - exprs += _iter_definition_exprs_from_lists(list_) - else: - exprs.append(list_) - return exprs - - -def _get_for_stmt_definition_exprs(for_stmt): - exprlist = for_stmt.children[1] - if exprlist.type != 'exprlist': - return [exprlist] - return list(_iter_definition_exprs_from_lists(exprlist)) - - -class _Context(object): - def __init__(self, node, add_syntax_error, parent_context=None): - self.node = node - self.blocks = [] - self.parent_context = parent_context - self._used_name_dict = {} - self._global_names = [] - self._nonlocal_names = [] - self._nonlocal_names_in_subscopes = [] - self._add_syntax_error = add_syntax_error - - def is_async_funcdef(self): - # Stupidly enough async funcdefs can have two different forms, - # depending if a decorator is used or not. - return self.is_function() \ - and self.node.parent.type in ('async_funcdef', 'async_stmt') - - def is_function(self): - return self.node.type == 'funcdef' - - def add_name(self, name): - parent_type = name.parent.type - if parent_type == 'trailer': - # We are only interested in first level names. - return - - if parent_type == 'global_stmt': - self._global_names.append(name) - elif parent_type == 'nonlocal_stmt': - self._nonlocal_names.append(name) - else: - self._used_name_dict.setdefault(name.value, []).append(name) - - def finalize(self): - """ - Returns a list of nonlocal names that need to be part of that scope. - """ - self._analyze_names(self._global_names, 'global') - self._analyze_names(self._nonlocal_names, 'nonlocal') - - # Python2.6 doesn't have dict comprehensions. - global_name_strs = dict((n.value, n) for n in self._global_names) - for nonlocal_name in self._nonlocal_names: - try: - global_name = global_name_strs[nonlocal_name.value] - except KeyError: - continue - - message = "name '%s' is nonlocal and global" % global_name.value - if global_name.start_pos < nonlocal_name.start_pos: - error_name = global_name - else: - error_name = nonlocal_name - self._add_syntax_error(error_name, message) - - nonlocals_not_handled = [] - for nonlocal_name in self._nonlocal_names_in_subscopes: - search = nonlocal_name.value - if search in global_name_strs or self.parent_context is None: - message = "no binding for nonlocal '%s' found" % nonlocal_name.value - self._add_syntax_error(nonlocal_name, message) - elif not self.is_function() or \ - nonlocal_name.value not in self._used_name_dict: - nonlocals_not_handled.append(nonlocal_name) - return self._nonlocal_names + nonlocals_not_handled - - def _analyze_names(self, globals_or_nonlocals, type_): - def raise_(message): - self._add_syntax_error(base_name, message % (base_name.value, type_)) - - params = [] - if self.node.type == 'funcdef': - params = self.node.get_params() - - for base_name in globals_or_nonlocals: - found_global_or_nonlocal = False - # Somehow Python does it the reversed way. - for name in reversed(self._used_name_dict.get(base_name.value, [])): - if name.start_pos > base_name.start_pos: - # All following names don't have to be checked. - found_global_or_nonlocal = True - - parent = name.parent - if parent.type == 'param' and parent.name == name: - # Skip those here, these definitions belong to the next - # scope. - continue - - if name.is_definition(): - if parent.type == 'expr_stmt' \ - and parent.children[1].type == 'annassign': - if found_global_or_nonlocal: - # If it's after the global the error seems to be - # placed there. - base_name = name - raise_("annotated name '%s' can't be %s") - break - else: - message = "name '%s' is assigned to before %s declaration" - else: - message = "name '%s' is used prior to %s declaration" - - if not found_global_or_nonlocal: - raise_(message) - # Only add an error for the first occurence. - break - - for param in params: - if param.name.value == base_name.value: - raise_("name '%s' is parameter and %s"), - - @contextmanager - def add_block(self, node): - self.blocks.append(node) - yield - self.blocks.pop() - - def add_context(self, node): - return _Context(node, self._add_syntax_error, parent_context=self) - - def close_child_context(self, child_context): - self._nonlocal_names_in_subscopes += child_context.finalize() - - -class ErrorFinder(Normalizer): - """ - Searches for errors in the syntax tree. - """ - def __init__(self, *args, **kwargs): - super(ErrorFinder, self).__init__(*args, **kwargs) - self._error_dict = {} - self.version = self.grammar.version_info - - def initialize(self, node): - def create_context(node): - if node is None: - return None - - parent_context = create_context(node.parent) - if node.type in ('classdef', 'funcdef', 'file_input'): - return _Context(node, self._add_syntax_error, parent_context) - return parent_context - - self.context = create_context(node) or _Context(node, self._add_syntax_error) - self._indentation_count = 0 - - def visit(self, node): - if node.type == 'error_node': - with self.visit_node(node): - # Don't need to investigate the inners of an error node. We - # might find errors in there that should be ignored, because - # the error node itself already shows that there's an issue. - return '' - return super(ErrorFinder, self).visit(node) - - - @contextmanager - def visit_node(self, node): - self._check_type_rules(node) - - if node.type in _BLOCK_STMTS: - with self.context.add_block(node): - if len(self.context.blocks) == _MAX_BLOCK_SIZE: - self._add_syntax_error(node, "too many statically nested blocks") - yield - return - elif node.type == 'suite': - self._indentation_count += 1 - if self._indentation_count == _MAX_INDENT_COUNT: - self._add_indentation_error(node.children[1], "too many levels of indentation") - - yield - - if node.type == 'suite': - self._indentation_count -= 1 - elif node.type in ('classdef', 'funcdef'): - context = self.context - self.context = context.parent_context - self.context.close_child_context(context) - - def visit_leaf(self, leaf): - if leaf.type == 'error_leaf': - if leaf.original_type in ('indent', 'error_dedent'): - # Indents/Dedents itself never have a prefix. They are just - # "pseudo" tokens that get removed by the syntax tree later. - # Therefore in case of an error we also have to check for this. - spacing = list(leaf.get_next_leaf()._split_prefix())[-1] - if leaf.original_type == 'indent': - message = 'unexpected indent' - else: - message = 'unindent does not match any outer indentation level' - self._add_indentation_error(spacing, message) - else: - if leaf.value.startswith('\\'): - message = 'unexpected character after line continuation character' - else: - match = re.match('\\w{,2}("{1,3}|\'{1,3})', leaf.value) - if match is None: - message = 'invalid syntax' - else: - if len(match.group(1)) == 1: - message = 'EOL while scanning string literal' - else: - message = 'EOF while scanning triple-quoted string literal' - self._add_syntax_error(leaf, message) - return '' - elif leaf.value == ':': - parent = leaf.parent - if parent.type in ('classdef', 'funcdef'): - self.context = self.context.add_context(parent) - - # The rest is rule based. - return super(ErrorFinder, self).visit_leaf(leaf) - - def _add_indentation_error(self, spacing, message): - self.add_issue(spacing, 903, "IndentationError: " + message) - - def _add_syntax_error(self, node, message): - self.add_issue(node, 901, "SyntaxError: " + message) - - def add_issue(self, node, code, message): - # Overwrite the default behavior. - # Check if the issues are on the same line. - line = node.start_pos[0] - args = (code, message, node) - self._error_dict.setdefault(line, args) - - def finalize(self): - self.context.finalize() - - for code, message, node in self._error_dict.values(): - self.issues.append(Issue(node, code, message)) - - -class IndentationRule(Rule): - code = 903 - - def _get_message(self, message): - message = super(IndentationRule, self)._get_message(message) - return "IndentationError: " + message - - -@ErrorFinder.register_rule(type='error_node') -class _ExpectIndentedBlock(IndentationRule): - message = 'expected an indented block' - - def get_node(self, node): - leaf = node.get_next_leaf() - return list(leaf._split_prefix())[-1] - - def is_issue(self, node): - # This is the beginning of a suite that is not indented. - return node.children[-1].type == 'newline' - - -class ErrorFinderConfig(NormalizerConfig): - normalizer_class = ErrorFinder - - -class SyntaxRule(Rule): - code = 901 - - def _get_message(self, message): - message = super(SyntaxRule, self)._get_message(message) - return "SyntaxError: " + message - - -@ErrorFinder.register_rule(type='error_node') -class _InvalidSyntaxRule(SyntaxRule): - message = "invalid syntax" - - def get_node(self, node): - return node.get_next_leaf() - - def is_issue(self, node): - # Error leafs will be added later as an error. - return node.get_next_leaf().type != 'error_leaf' - - -@ErrorFinder.register_rule(value='await') -class _AwaitOutsideAsync(SyntaxRule): - message = "'await' outside async function" - - def is_issue(self, leaf): - return not self._normalizer.context.is_async_funcdef() - - def get_error_node(self, node): - # Return the whole await statement. - return node.parent - - -@ErrorFinder.register_rule(value='break') -class _BreakOutsideLoop(SyntaxRule): - message = "'break' outside loop" - - def is_issue(self, leaf): - in_loop = False - for block in self._normalizer.context.blocks: - if block.type in ('for_stmt', 'while_stmt'): - in_loop = True - return not in_loop - - -@ErrorFinder.register_rule(value='continue') -class _ContinueChecks(SyntaxRule): - message = "'continue' not properly in loop" - message_in_finally = "'continue' not supported inside 'finally' clause" - - def is_issue(self, leaf): - in_loop = False - for block in self._normalizer.context.blocks: - if block.type in ('for_stmt', 'while_stmt'): - in_loop = True - if block.type == 'try_stmt': - last_block = block.children[-3] - if last_block == 'finally' and leaf.start_pos > last_block.start_pos: - self.add_issue(leaf, message=self.message_in_finally) - return False # Error already added - if not in_loop: - return True - - -@ErrorFinder.register_rule(value='from') -class _YieldFromCheck(SyntaxRule): - message = "'yield from' inside async function" - - def get_node(self, leaf): - return leaf.parent.parent # This is the actual yield statement. - - def is_issue(self, leaf): - return leaf.parent.type == 'yield_arg' \ - and self._normalizer.context.is_async_funcdef() - - -@ErrorFinder.register_rule(type='name') -class _NameChecks(SyntaxRule): - message = 'cannot assign to __debug__' - message_keyword = 'assignment to keyword' - message_none = 'cannot assign to None' - - def is_issue(self, leaf): - self._normalizer.context.add_name(leaf) - - if leaf.value == '__debug__' and leaf.is_definition(): - if self._normalizer.version < (3, 0): - return True - else: - self.add_issue(leaf, message=self.message_keyword) - if leaf.value == 'None' and self._normalizer.version < (3, 0) \ - and leaf.is_definition(): - self.add_issue(leaf, message=self.message_none) - - -@ErrorFinder.register_rule(type='string') -class _StringChecks(SyntaxRule): - message = "bytes can only contain ASCII literal characters." - - def is_issue(self, leaf): - string_prefix = leaf.string_prefix.lower() - if 'b' in string_prefix \ - and self._normalizer.version >= (3, 0) \ - and any(c for c in leaf.value if ord(c) > 127): - # b'ä' - return True - - if 'r' not in string_prefix: - # Raw strings don't need to be checked if they have proper - # escaping. - is_bytes = self._normalizer.version < (3, 0) - if 'b' in string_prefix: - is_bytes = True - if 'u' in string_prefix: - is_bytes = False - - payload = leaf._get_payload() - if is_bytes: - payload = payload.encode('utf-8') - func = codecs.escape_decode - else: - func = codecs.unicode_escape_decode - - try: - with warnings.catch_warnings(): - # The warnings from parsing strings are not relevant. - warnings.filterwarnings('ignore') - func(payload) - except UnicodeDecodeError as e: - self.add_issue(leaf, message='(unicode error) ' + str(e)) - except ValueError as e: - self.add_issue(leaf, message='(value error) ' + str(e)) - - -@ErrorFinder.register_rule(value='*') -class _StarCheck(SyntaxRule): - message = "named arguments must follow bare *" - - def is_issue(self, leaf): - params = leaf.parent - if params.type == 'parameters' and params: - after = params.children[params.children.index(leaf) + 1:] - after = [child for child in after - if child not in (',', ')') and not child.star_count] - return len(after) == 0 - - -@ErrorFinder.register_rule(value='**') -class _StarStarCheck(SyntaxRule): - # e.g. {**{} for a in [1]} - # TODO this should probably get a better end_pos including - # the next sibling of leaf. - message = "dict unpacking cannot be used in dict comprehension" - - def is_issue(self, leaf): - if leaf.parent.type == 'dictorsetmaker': - comp_for = leaf.get_next_sibling().get_next_sibling() - return comp_for is not None and comp_for.type == 'comp_for' - - -@ErrorFinder.register_rule(value='yield') -@ErrorFinder.register_rule(value='return') -class _ReturnAndYieldChecks(SyntaxRule): - message = "'return' with value in async generator" - message_async_yield = "'yield' inside async function" - - def get_node(self, leaf): - return leaf.parent - - def is_issue(self, leaf): - if self._normalizer.context.node.type != 'funcdef': - self.add_issue(self.get_node(leaf), message="'%s' outside function" % leaf.value) - elif self._normalizer.context.is_async_funcdef() \ - and any(self._normalizer.context.node.iter_yield_exprs()): - if leaf.value == 'return' and leaf.parent.type == 'return_stmt': - return True - elif leaf.value == 'yield' \ - and leaf.get_next_leaf() != 'from' \ - and self._normalizer.version == (3, 5): - self.add_issue(self.get_node(leaf), message=self.message_async_yield) - - -@ErrorFinder.register_rule(type='strings') -class _BytesAndStringMix(SyntaxRule): - # e.g. 's' b'' - message = "cannot mix bytes and nonbytes literals" - - def _is_bytes_literal(self, string): - return 'b' in string.string_prefix.lower() - - def is_issue(self, node): - first = node.children[0] - if first.type == 'string' and self._normalizer.version >= (3, 0): - first_is_bytes = self._is_bytes_literal(first) - for string in node.children[1:]: - if first_is_bytes != self._is_bytes_literal(string): - return True - - -@ErrorFinder.register_rule(type='import_as_names') -class _TrailingImportComma(SyntaxRule): - # e.g. from foo import a, - message = "trailing comma not allowed without surrounding parentheses" - - def is_issue(self, node): - if node.children[-1] == ',': - return True - - -@ErrorFinder.register_rule(type='import_from') -class _ImportStarInFunction(SyntaxRule): - message = "import * only allowed at module level" - - def is_issue(self, node): - return node.is_star_import() and self._normalizer.context.parent_context is not None - - -@ErrorFinder.register_rule(type='import_from') -class _FutureImportRule(SyntaxRule): - message = "from __future__ imports must occur at the beginning of the file" - - def is_issue(self, node): - if _is_future_import(node): - if not _is_future_import_first(node): - return True - - for from_name, future_name in node.get_paths(): - name = future_name.value - allowed_futures = list(ALLOWED_FUTURES) - if self._normalizer.version >= (3, 5): - allowed_futures.append('generator_stop') - - if name == 'braces': - self.add_issue(node, message = "not a chance") - elif name == 'barry_as_FLUFL': - m = "Seriously I'm not implementing this :) ~ Dave" - self.add_issue(node, message=m) - elif name not in ALLOWED_FUTURES: - message = "future feature %s is not defined" % name - self.add_issue(node, message=message) - - -@ErrorFinder.register_rule(type='star_expr') -class _StarExprRule(SyntaxRule): - message = "starred assignment target must be in a list or tuple" - message_iterable_unpacking = "iterable unpacking cannot be used in comprehension" - message_assignment = "can use starred expression only as assignment target" - - def is_issue(self, node): - if node.parent.type not in _STAR_EXPR_PARENTS: - return True - if node.parent.type == 'testlist_comp': - # [*[] for a in [1]] - if node.parent.children[1].type == 'comp_for': - self.add_issue(node, message=self.message_iterable_unpacking) - if self._normalizer.version <= (3, 4): - n = search_ancestor(node, 'for_stmt', 'expr_stmt') - found_definition = False - if n is not None: - if n.type == 'expr_stmt': - exprs = _get_expr_stmt_definition_exprs(n) - else: - exprs = _get_for_stmt_definition_exprs(n) - if node in exprs: - found_definition = True - - if not found_definition: - self.add_issue(node, message=self.message_assignment) - - -@ErrorFinder.register_rule(types=_STAR_EXPR_PARENTS) -class _StarExprParentRule(SyntaxRule): - def is_issue(self, node): - if node.parent.type == 'del_stmt': - self.add_issue(node.parent, message="can't use starred expression here") - else: - def is_definition(node, ancestor): - if ancestor is None: - return False - - type_ = ancestor.type - if type_ == 'trailer': - return False - - if type_ == 'expr_stmt': - return node.start_pos < ancestor.children[-1].start_pos - - return is_definition(node, ancestor.parent) - - if is_definition(node, node.parent): - args = [c for c in node.children if c != ','] - starred = [c for c in args if c.type == 'star_expr'] - if len(starred) > 1: - message = "two starred expressions in assignment" - self.add_issue(starred[1], message=message) - elif starred: - count = args.index(starred[0]) - if count >= 256: - message = "too many expressions in star-unpacking assignment" - self.add_issue(starred[0], message=message) - - -@ErrorFinder.register_rule(type='annassign') -class _AnnotatorRule(SyntaxRule): - # True: int - # {}: float - message = "illegal target for annotation" - - def get_node(self, node): - return node.parent - - def is_issue(self, node): - type_ = None - lhs = node.parent.children[0] - lhs = _remove_parens(lhs) - try: - children = lhs.children - except AttributeError: - pass - else: - if ',' in children or lhs.type == 'atom' and children[0] == '(': - type_ = 'tuple' - elif lhs.type == 'atom' and children[0] == '[': - type_ = 'list' - trailer = children[-1] - - if type_ is None: - if not (lhs.type == 'name' - # subscript/attributes are allowed - or lhs.type in ('atom_expr', 'power') - and trailer.type == 'trailer' - and trailer.children[0] != '('): - return True - else: - # x, y: str - message = "only single target (not %s) can be annotated" - self.add_issue(lhs.parent, message=message % type_) - - -@ErrorFinder.register_rule(type='argument') -class _ArgumentRule(SyntaxRule): - def is_issue(self, node): - first = node.children[0] - if node.children[1] == '=' and first.type != 'name': - if first.type == 'lambdef': - # f(lambda: 1=1) - message = "lambda cannot contain assignment" - else: - # f(+x=1) - message = "keyword can't be an expression" - self.add_issue(first, message=message) - - -@ErrorFinder.register_rule(type='nonlocal_stmt') -class _NonlocalModuleLevelRule(SyntaxRule): - message = "nonlocal declaration not allowed at module level" - - def is_issue(self, node): - return self._normalizer.context.parent_context is None - - -@ErrorFinder.register_rule(type='arglist') -class _ArglistRule(SyntaxRule): - @property - def message(self): - if self._normalizer.version < (3, 7): - return "Generator expression must be parenthesized if not sole argument" - else: - return "Generator expression must be parenthesized" - - def is_issue(self, node): - first_arg = node.children[0] - if first_arg.type == 'argument' \ - and first_arg.children[1].type == 'comp_for': - # e.g. foo(x for x in [], b) - return len(node.children) >= 2 - else: - arg_set = set() - kw_only = False - kw_unpacking_only = False - is_old_starred = False - # In python 3 this would be a bit easier (stars are part of - # argument), but we have to understand both. - for argument in node.children: - if argument == ',': - continue - - if argument in ('*', '**'): - # Python < 3.5 has the order engraved in the grammar - # file. No need to do anything here. - is_old_starred = True - continue - if is_old_starred: - is_old_starred = False - continue - - if argument.type == 'argument': - first = argument.children[0] - if first in ('*', '**'): - if first == '*': - if kw_unpacking_only: - # foo(**kwargs, *args) - message = "iterable argument unpacking follows keyword argument unpacking" - self.add_issue(argument, message=message) - else: - kw_unpacking_only = True - else: # Is a keyword argument. - kw_only = True - if first.type == 'name': - if first.value in arg_set: - # f(x=1, x=2) - self.add_issue(first, message="keyword argument repeated") - else: - arg_set.add(first.value) - else: - if kw_unpacking_only: - # f(**x, y) - message = "positional argument follows keyword argument unpacking" - self.add_issue(argument, message=message) - elif kw_only: - # f(x=2, y) - message = "positional argument follows keyword argument" - self.add_issue(argument, message=message) - -@ErrorFinder.register_rule(type='parameters') -@ErrorFinder.register_rule(type='lambdef') -class _ParameterRule(SyntaxRule): - # def f(x=3, y): pass - message = "non-default argument follows default argument" - - def is_issue(self, node): - param_names = set() - default_only = False - for p in _iter_params(node): - if p.name.value in param_names: - message = "duplicate argument '%s' in function definition" - self.add_issue(p.name, message=message % p.name.value) - param_names.add(p.name.value) - - if p.default is None and not p.star_count: - if default_only: - return True - else: - default_only = True - - -@ErrorFinder.register_rule(type='try_stmt') -class _TryStmtRule(SyntaxRule): - message = "default 'except:' must be last" - - def is_issue(self, try_stmt): - default_except = None - for except_clause in try_stmt.children[3::3]: - if except_clause in ('else', 'finally'): - break - if except_clause == 'except': - default_except = except_clause - elif default_except is not None: - self.add_issue(default_except, message=self.message) - - -@ErrorFinder.register_rule(type='fstring') -class _FStringRule(SyntaxRule): - _fstring_grammar = None - message_nested = "f-string: expressions nested too deeply" - message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'" - - def _check_format_spec(self, format_spec, depth): - self._check_fstring_contents(format_spec.children[1:], depth) - - def _check_fstring_expr(self, fstring_expr, depth): - if depth >= 2: - self.add_issue(fstring_expr, message=self.message_nested) - - conversion = fstring_expr.children[2] - if conversion.type == 'fstring_conversion': - name = conversion.children[1] - if name.value not in ('s', 'r', 'a'): - self.add_issue(name, message=self.message_conversion) - - format_spec = fstring_expr.children[-2] - if format_spec.type == 'fstring_format_spec': - self._check_format_spec(format_spec, depth + 1) - - def is_issue(self, fstring): - self._check_fstring_contents(fstring.children[1:-1]) - - def _check_fstring_contents(self, children, depth=0): - for fstring_content in children: - if fstring_content.type == 'fstring_expr': - self._check_fstring_expr(fstring_content, depth) - - -class _CheckAssignmentRule(SyntaxRule): - def _check_assignment(self, node, is_deletion=False): - error = None - type_ = node.type - if type_ == 'lambdef': - error = 'lambda' - elif type_ == 'atom': - first, second = node.children[:2] - error = _get_comprehension_type(node) - if error is None: - if second.type == 'dictorsetmaker': - error = 'literal' - elif first in ('(', '['): - if second.type == 'yield_expr': - error = 'yield expression' - elif second.type == 'testlist_comp': - # This is not a comprehension, they were handled - # further above. - for child in second.children[::2]: - self._check_assignment(child, is_deletion) - else: # Everything handled, must be useless brackets. - self._check_assignment(second, is_deletion) - elif type_ == 'keyword': - error = 'keyword' - elif type_ == 'operator': - if node.value == '...': - error = 'Ellipsis' - elif type_ == 'comparison': - error = 'comparison' - elif type_ in ('string', 'number', 'strings'): - error = 'literal' - elif type_ == 'yield_expr': - # This one seems to be a slightly different warning in Python. - message = 'assignment to yield expression not possible' - self.add_issue(node, message=message) - elif type_ == 'test': - error = 'conditional expression' - elif type_ in ('atom_expr', 'power'): - if node.children[0] == 'await': - error = 'await expression' - elif node.children[-2] == '**': - error = 'operator' - else: - # Has a trailer - trailer = node.children[-1] - assert trailer.type == 'trailer' - if trailer.children[0] == '(': - error = 'function call' - elif type_ in ('testlist_star_expr', 'exprlist', 'testlist'): - for child in node.children[::2]: - self._check_assignment(child, is_deletion) - elif ('expr' in type_ and type_ != 'star_expr' # is a substring - or '_test' in type_ - or type_ in ('term', 'factor')): - error = 'operator' - - if error is not None: - message = "can't %s %s" % ("delete" if is_deletion else "assign to", error) - self.add_issue(node, message=message) - - -@ErrorFinder.register_rule(type='comp_for') -class _CompForRule(_CheckAssignmentRule): - message = "asynchronous comprehension outside of an asynchronous function" - - def is_issue(self, node): - # Some of the nodes here are already used, so no else if - expr_list = node.children[1 + int(node.children[0] == 'async')] - if expr_list.type != 'expr_list': # Already handled. - self._check_assignment(expr_list) - - return node.children[0] == 'async' \ - and not self._normalizer.context.is_async_funcdef() - - -@ErrorFinder.register_rule(type='expr_stmt') -class _ExprStmtRule(_CheckAssignmentRule): - message = "illegal expression for augmented assignment" - - def is_issue(self, node): - for before_equal in node.children[:-2:2]: - self._check_assignment(before_equal) - - augassign = node.children[1] - if augassign != '=' and augassign.type != 'annassign': # Is augassign. - return node.children[0].type in ('testlist_star_expr', 'atom', 'testlist') - - -@ErrorFinder.register_rule(type='with_item') -class _WithItemRule(_CheckAssignmentRule): - def is_issue(self, with_item): - self._check_assignment(with_item.children[2]) - - -@ErrorFinder.register_rule(type='del_stmt') -class _DelStmtRule(_CheckAssignmentRule): - def is_issue(self, del_stmt): - child = del_stmt.children[1] - - if child.type != 'expr_list': # Already handled. - self._check_assignment(child, is_deletion=True) - - -@ErrorFinder.register_rule(type='expr_list') -class _ExprListRule(_CheckAssignmentRule): - def is_issue(self, expr_list): - for expr in expr_list.children[::2]: - self._check_assignment(expr) - - -@ErrorFinder.register_rule(type='for_stmt') -class _ForStmtRule(_CheckAssignmentRule): - def is_issue(self, for_stmt): - # Some of the nodes here are already used, so no else if - expr_list = for_stmt.children[1] - if expr_list.type != 'expr_list': # Already handled. - self._check_assignment(expr_list) diff --git a/pythonFiles/parso/python/grammar26.txt b/pythonFiles/parso/python/grammar26.txt deleted file mode 100644 index d9cede2e9da9..000000000000 --- a/pythonFiles/parso/python/grammar26.txt +++ /dev/null @@ -1,159 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Commands for Kees Blom's railroad program -#diagram:token NAME -#diagram:token NUMBER -#diagram:token STRING -#diagram:token NEWLINE -#diagram:token ENDMARKER -#diagram:token INDENT -#diagram:output\input python.bla -#diagram:token DEDENT -#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm -#diagram:rules - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() and input() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ':' suite -parameters: '(' [varargslist] ')' -varargslist: ((fpdef ['=' test] ',')* - ('*' NAME [',' '**' NAME] | '**' NAME) | - fpdef ['=' test] (',' fpdef ['=' test])* [',']) -fpdef: NAME | '(' fplist ')' -fplist: fpdef (',' fpdef)* [','] - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | exec_stmt | assert_stmt) -expr_stmt: testlist (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist))*) -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -print_stmt: 'print' ( [ test (',' test)* [','] ] | - '>>' test [ (',' test)+ [','] ] ) -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test [',' test [',' test]]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -import_from: ('from' ('.'* dotted_name | '.'+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -exec_stmt: 'exec' expr ['in' test [',' test]] -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item ':' suite -# Dave: Python2.6 actually defines a little bit of a different label called -# 'with_var'. However in 2.7+ this is the default. Apply it for -# consistency reasons. -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test [('as' | ',') test]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -# Backward compatibility cruft to support: -# [ x for x in lambda: True, lambda: False if x() ] -# even while also allowing: -# lambda x: 5 if x else 2 -# (But not a mix of the two) -testlist_safe: old_test [(',' old_test)+ [',']] -old_test: or_test | old_lambdef -old_lambdef: 'lambda' [varargslist] ':' old_test - -test: or_test ['if' or_test 'else' test] | lambdef -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [listmaker] ']' | - '{' [dictorsetmaker] '}' | - '`' testlist1 '`' | - NAME | NUMBER | strings) -strings: STRING+ -listmaker: test ( list_for | (',' test)* [','] ) -# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the -# default. It's more consistent like this. -testlist_comp: test ( gen_for | (',' test)* [','] ) -lambdef: 'lambda' [varargslist] ':' test -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: expr (',' expr)* [','] -testlist: test (',' test)* [','] -# Dave: Rename from dictmaker to dictorsetmaker, because this is more -# consistent with the following grammars. -dictorsetmaker: test ':' test (',' test ':' test)* [','] - -classdef: 'class' NAME ['(' [testlist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -argument: test [gen_for] | test '=' test # Really [keyword '='] test - -list_iter: list_for | list_if -list_for: 'for' exprlist 'in' testlist_safe [list_iter] -list_if: 'if' old_test [list_iter] - -gen_iter: gen_for | gen_if -gen_for: 'for' exprlist 'in' or_test [gen_iter] -gen_if: 'if' old_test [gen_iter] - -testlist1: test (',' test)* - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [testlist] diff --git a/pythonFiles/parso/python/grammar27.txt b/pythonFiles/parso/python/grammar27.txt deleted file mode 100644 index 359f12b43e1f..000000000000 --- a/pythonFiles/parso/python/grammar27.txt +++ /dev/null @@ -1,143 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() and input() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ':' suite -parameters: '(' [varargslist] ')' -varargslist: ((fpdef ['=' test] ',')* - ('*' NAME [',' '**' NAME] | '**' NAME) | - fpdef ['=' test] (',' fpdef ['=' test])* [',']) -fpdef: NAME | '(' fplist ')' -fplist: fpdef (',' fpdef)* [','] - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | exec_stmt | assert_stmt) -expr_stmt: testlist (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist))*) -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -print_stmt: 'print' ( [ test (',' test)* [','] ] | - '>>' test [ (',' test)+ [','] ] ) -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test [',' test [',' test]]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -import_from: ('from' ('.'* dotted_name | '.'+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -exec_stmt: 'exec' expr ['in' test [',' test]] -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test [('as' | ',') test]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -# Backward compatibility cruft to support: -# [ x for x in lambda: True, lambda: False if x() ] -# even while also allowing: -# lambda x: 5 if x else 2 -# (But not a mix of the two) -testlist_safe: old_test [(',' old_test)+ [',']] -old_test: or_test | old_lambdef -old_lambdef: 'lambda' [varargslist] ':' old_test - -test: or_test ['if' or_test 'else' test] | lambdef -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [listmaker] ']' | - '{' [dictorsetmaker] '}' | - '`' testlist1 '`' | - NAME | NUMBER | strings) -strings: STRING+ -listmaker: test ( list_for | (',' test)* [','] ) -testlist_comp: test ( comp_for | (',' test)* [','] ) -lambdef: 'lambda' [varargslist] ':' test -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: expr (',' expr)* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [testlist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [comp_for] | test '=' test - -list_iter: list_for | list_if -list_for: 'for' exprlist 'in' testlist_safe [list_iter] -list_if: 'if' old_test [list_iter] - -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' old_test [comp_iter] - -testlist1: test (',' test)* - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [testlist] diff --git a/pythonFiles/parso/python/grammar33.txt b/pythonFiles/parso/python/grammar33.txt deleted file mode 100644 index 3a5580926797..000000000000 --- a/pythonFiles/parso/python/grammar33.txt +++ /dev/null @@ -1,134 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [comp_for] | test '=' test # Really [keyword '='] test -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar34.txt b/pythonFiles/parso/python/grammar34.txt deleted file mode 100644 index 324bba18753d..000000000000 --- a/pythonFiles/parso/python/grammar34.txt +++ /dev/null @@ -1,134 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [comp_for] | test '=' test # Really [keyword '='] test -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar35.txt b/pythonFiles/parso/python/grammar35.txt deleted file mode 100644 index 5868b8f7031a..000000000000 --- a/pythonFiles/parso/python/grammar35.txt +++ /dev/null @@ -1,153 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens -# skipping python3.5 compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguments are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar36.txt b/pythonFiles/parso/python/grammar36.txt deleted file mode 100644 index b82c1fec1145..000000000000 --- a/pythonFiles/parso/python/grammar36.txt +++ /dev/null @@ -1,157 +0,0 @@ -# Grammar for Python - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of -# skipping python3.5+ compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ - '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ - '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [',']]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [','] -) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -annassign: ':' test ['=' test] -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguments are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: comp_for | comp_if -comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist - -strings: (STRING | fstring)+ -fstring: FSTRING_START fstring_content* FSTRING_END -fstring_content: FSTRING_STRING | fstring_expr -fstring_conversion: '!' NAME -fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}' -fstring_format_spec: ':' fstring_content* diff --git a/pythonFiles/parso/python/grammar37.txt b/pythonFiles/parso/python/grammar37.txt deleted file mode 100644 index 7d112f79852b..000000000000 --- a/pythonFiles/parso/python/grammar37.txt +++ /dev/null @@ -1,157 +0,0 @@ -# Grammar for Python - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of -# skipping python3.5+ compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ - '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] - | '**' tfpdef [',']) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ - '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [',']]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] - | '**' vfpdef [','] -) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -annassign: ':' test ['=' test] -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguments are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: comp_for | comp_if -comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist - -strings: (STRING | fstring)+ -fstring: FSTRING_START fstring_content* FSTRING_END -fstring_content: FSTRING_STRING | fstring_expr -fstring_conversion: '!' NAME -fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}' -fstring_format_spec: ':' fstring_content* diff --git a/pythonFiles/parso/python/issue_list.txt b/pythonFiles/parso/python/issue_list.txt deleted file mode 100644 index e5e2c9dda764..000000000000 --- a/pythonFiles/parso/python/issue_list.txt +++ /dev/null @@ -1,176 +0,0 @@ -A list of syntax/indentation errors I've encountered in CPython. - -# Python/compile.c - "'continue' not properly in loop" - "'continue' not supported inside 'finally' clause" # Until loop - "default 'except:' must be last" - "from __future__ imports must occur at the beginning of the file" - "'return' outside function" - "'return' with value in async generator" - "'break' outside loop" - "two starred expressions in assignment" - "asynchronous comprehension outside of an asynchronous function" - "'yield' outside function" # For both yield and yield from - "'yield from' inside async function" - "'await' outside function" - "'await' outside async function" - "starred assignment target must be in a list or tuple" - "can't use starred expression here" - "too many statically nested blocks" # Max. 20 - # This is one of the few places in the cpython code base that I really - # don't understand. It feels a bit hacky if you look at the implementation - # of UNPACK_EX. - "too many expressions in star-unpacking assignment" - - # Just ignore this one, newer versions will not be affected anymore and - # it's a limit of 2^16 - 1. - "too many annotations" # Only python 3.0 - 3.5, 3.6 is not affected. - -# Python/ast.c - # used with_item exprlist expr_stmt - "can't %s %s" % ("assign to" or "delete", - "lambda" - "function call" # foo() - "generator expression" - "list comprehension" - "set comprehension" - "dict comprehension" - "keyword" - "Ellipsis" - "comparison" - Dict: Set: Num: Str: Bytes: JoinedStr: FormattedValue: - "literal" - BoolOp: BinOp: UnaryOp: - "operator" - Yield: YieldFrom: - "yield expression" - Await: - "await expression" - IfExp: - "conditional expression" - "assignment to keyword" # (keywords + __debug__) # None = 2 - "named arguments must follow bare *" # def foo(*): pass - "non-default argument follows default argument" # def f(x=3, y): pass - "iterable unpacking cannot be used in comprehension" # [*[] for a in [1]] - "dict unpacking cannot be used in dict comprehension" # {**{} for a in [1]} - "Generator expression must be parenthesized if not sole argument" # foo(x for x in [], b) - "positional argument follows keyword argument unpacking" # f(**x, y) >= 3.5 - "positional argument follows keyword argument" # f(x=2, y) >= 3.5 - "iterable argument unpacking follows keyword argument unpacking" # foo(**kwargs, *args) - "lambda cannot contain assignment" # f(lambda: 1=1) - "keyword can't be an expression" # f(+x=1) - "keyword argument repeated" # f(x=1, x=2) - "illegal expression for augmented assignment" # x, y += 1 - "only single target (not list) can be annotated" # [x, y]: int - "only single target (not tuple) can be annotated" # x, y: str - "illegal target for annotation" # True: 1` - "trailing comma not allowed without surrounding parentheses" # from foo import a, - "bytes can only contain ASCII literal characters." # b'ä' # prob. only python 3 - "cannot mix bytes and nonbytes literals" # 's' b'' - "assignment to yield expression not possible" # x = yield 1 = 3 - - "f-string: empty expression not allowed" # f'{}' - "f-string: single '}' is not allowed" # f'}' - "f-string: expressions nested too deeply" # f'{1:{5:{3}}}' - "f-string expression part cannot include a backslash" # f'{"\"}' or f'{"\\"}' - "f-string expression part cannot include '#'" # f'{#}' - "f-string: unterminated string" # f'{"}' - "f-string: mismatched '(', '{', or '['" - "f-string: invalid conversion character: expected 's', 'r', or 'a'" # f'{1!b}' - "f-string: unexpected end of string" # Doesn't really happen?! - "f-string: expecting '}'" # f'{' - "(unicode error) unknown error - "(value error) unknown error - "(unicode error) MESSAGE - MESSAGES = { - "\\ at end of string" - "truncated \\xXX escape" - "truncated \\uXXXX escape" - "truncated \\UXXXXXXXX escape" - "illegal Unicode character" # '\Uffffffff' - "malformed \\N character escape" # '\N{}' - "unknown Unicode character name" # '\N{foo}' - } - "(value error) MESSAGE # bytes - MESSAGES = { - "Trailing \\ in string" - "invalid \\x escape at position %d" - } - - "invalid escape sequence \\%c" # Only happens when used in `python -W error` - "unexpected node" # Probably irrelevant - "Unexpected node-type in from-import" # Irrelevant, doesn't happen. - "malformed 'try' statement" # Irrelevant, doesn't happen. - -# Python/symtable.c - "duplicate argument '%U' in function definition" - "name '%U' is assigned to before global declaration" - "name '%U' is assigned to before nonlocal declaration" - "name '%U' is used prior to global declaration" - "name '%U' is used prior to nonlocal declaration" - "annotated name '%U' can't be global" - "annotated name '%U' can't be nonlocal" - "import * only allowed at module level" - - "name '%U' is parameter and global", - "name '%U' is nonlocal and global", - "name '%U' is parameter and nonlocal", - - "nonlocal declaration not allowed at module level"); - "no binding for nonlocal '%U' found", - # RecursionError. Not handled. For all human written code, this is probably - # not an issue. eval("()"*x) with x>=2998 for example fails, but that's - # more than 2000 executions on one line. - "maximum recursion depth exceeded during compilation"); - -# Python/future.c - "not a chance" - "future feature %.100s is not defined" - "from __future__ imports must occur at the beginning of the file" # Also in compile.c - -# Parser/tokenizer.c - # All the following issues seem to be irrelevant for parso, because the - # encoding stuff is done before it reaches the tokenizer. It's already - # unicode at that point. - "encoding problem: %s" - "encoding problem: %s with BOM" - "Non-UTF-8 code starting with '\\x%.2x' in file %U on line %i, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details" - -# Parser/pythonrun.c - E_SYNTAX: "invalid syntax" - E_LINECONT: "unexpected character after line continuation character" - E_IDENTIFIER: "invalid character in identifier" - # Also just use 'invalid syntax'. Happens mostly with stuff like `(`. This - # message doesn't really help the user, because it only appears very - # randomly, e.g. `(or` wouldn't yield this error. - E_EOF: "unexpected EOF while parsing" - # Even in 3.6 this is implemented kind of shaky. Not implemented, I think - # cPython needs to fix this one first. - # e.g. `ast.parse('def x():\n\t if 1:\n \t \tpass')` works :/ - E_TABSPACE: "inconsistent use of tabs and spaces in indentation" - # Ignored, just shown as "invalid syntax". The error has mostly to do with - # numbers like 0b2 everywhere or 1.6_ in Python3.6. - E_TOKEN: "invalid token" - E_EOFS: "EOF while scanning triple-quoted string literal" - E_EOLS: "EOL while scanning string literal" - - # IndentationError - E_DEDENT: "unindent does not match any outer indentation level" - E_TOODEEP: "too many levels of indentation" # 100 levels - E_SYNTAX: "expected an indented block" - "unexpected indent" - # I don't think this actually ever happens. - "unexpected unindent" - - - # Irrelevant for parso for now. - E_OVERFLOW: "expression too long" - E_DECODE: "unknown decode error" - E_BADSINGLE: "multiple statements found while compiling a single statement" - - -Version specific: -Python 3.5: - 'yield' inside async function -Python 3.3/3.4: - can use starred expression only as assignment target diff --git a/pythonFiles/parso/python/parser.py b/pythonFiles/parso/python/parser.py deleted file mode 100644 index 7cdf987ab365..000000000000 --- a/pythonFiles/parso/python/parser.py +++ /dev/null @@ -1,265 +0,0 @@ -from parso.python import tree -from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, - STRING, tok_name, NAME, FSTRING_STRING, - FSTRING_START, FSTRING_END) -from parso.parser import BaseParser -from parso.pgen2.parse import token_to_ilabel - - -class Parser(BaseParser): - """ - This class is used to parse a Python file, it then divides them into a - class structure of different scopes. - - :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. - """ - - node_map = { - 'expr_stmt': tree.ExprStmt, - 'classdef': tree.Class, - 'funcdef': tree.Function, - 'file_input': tree.Module, - 'import_name': tree.ImportName, - 'import_from': tree.ImportFrom, - 'break_stmt': tree.KeywordStatement, - 'continue_stmt': tree.KeywordStatement, - 'return_stmt': tree.ReturnStmt, - 'raise_stmt': tree.KeywordStatement, - 'yield_expr': tree.YieldExpr, - 'del_stmt': tree.KeywordStatement, - 'pass_stmt': tree.KeywordStatement, - 'global_stmt': tree.GlobalStmt, - 'nonlocal_stmt': tree.KeywordStatement, - 'print_stmt': tree.KeywordStatement, - 'assert_stmt': tree.AssertStmt, - 'if_stmt': tree.IfStmt, - 'with_stmt': tree.WithStmt, - 'for_stmt': tree.ForStmt, - 'while_stmt': tree.WhileStmt, - 'try_stmt': tree.TryStmt, - 'comp_for': tree.CompFor, - # Not sure if this is the best idea, but IMO it's the easiest way to - # avoid extreme amounts of work around the subtle difference of 2/3 - # grammar in list comoprehensions. - 'list_for': tree.CompFor, - # Same here. This just exists in Python 2.6. - 'gen_for': tree.CompFor, - 'decorator': tree.Decorator, - 'lambdef': tree.Lambda, - 'old_lambdef': tree.Lambda, - 'lambdef_nocond': tree.Lambda, - } - default_node = tree.PythonNode - - # Names/Keywords are handled separately - _leaf_map = { - STRING: tree.String, - NUMBER: tree.Number, - NEWLINE: tree.Newline, - ENDMARKER: tree.EndMarker, - FSTRING_STRING: tree.FStringString, - FSTRING_START: tree.FStringStart, - FSTRING_END: tree.FStringEnd, - } - - def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'): - super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery) - - self.syntax_errors = [] - self._omit_dedent_list = [] - self._indent_counter = 0 - - # TODO do print absolute import detection here. - # try: - # del python_grammar_no_print_statement.keywords["print"] - # except KeyError: - # pass # Doesn't exist in the Python 3 grammar. - - # if self.options["print_function"]: - # python_grammar = pygram.python_grammar_no_print_statement - # else: - - def parse(self, tokens): - if self._error_recovery: - if self._start_symbol != 'file_input': - raise NotImplementedError - - tokens = self._recovery_tokenize(tokens) - - node = super(Parser, self).parse(tokens) - - if self._start_symbol == 'file_input' != node.type: - # If there's only one statement, we get back a non-module. That's - # not what we want, we want a module, so we add it here: - node = self.convert_node( - self._pgen_grammar, - self._pgen_grammar.symbol2number['file_input'], - [node] - ) - - return node - - def convert_node(self, pgen_grammar, type, children): - """ - Convert raw node information to a PythonBaseNode instance. - - This is passed to the parser driver which calls it whenever a reduction of a - grammar rule produces a new complete node, so that the tree is build - strictly bottom-up. - """ - # TODO REMOVE symbol, we don't want type here. - symbol = pgen_grammar.number2symbol[type] - try: - return self.node_map[symbol](children) - except KeyError: - if symbol == 'suite': - # We don't want the INDENT/DEDENT in our parser tree. Those - # leaves are just cancer. They are virtual leaves and not real - # ones and therefore have pseudo start/end positions and no - # prefixes. Just ignore them. - children = [children[0]] + children[2:-1] - elif symbol == 'list_if': - # Make transitioning from 2 to 3 easier. - symbol = 'comp_if' - elif symbol == 'listmaker': - # Same as list_if above. - symbol = 'testlist_comp' - return self.default_node(symbol, children) - - def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): - # print('leaf', repr(value), token.tok_name[type]) - if type == NAME: - if value in pgen_grammar.keywords: - return tree.Keyword(value, start_pos, prefix) - else: - return tree.Name(value, start_pos, prefix) - - return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) - - def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback): - def get_symbol_and_nodes(stack): - for dfa, state, (type_, nodes) in stack: - symbol = pgen_grammar.number2symbol[type_] - yield symbol, nodes - - tos_nodes = stack.get_tos_nodes() - if tos_nodes: - last_leaf = tos_nodes[-1].get_last_leaf() - else: - last_leaf = None - - if self._start_symbol == 'file_input' and \ - (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): - def reduce_stack(states, newstate): - # reduce - state = newstate - while states[state] == [(0, state)]: - self.pgen_parser._pop() - - dfa, state, (type_, nodes) = stack[-1] - states, first = dfa - - - # In Python statements need to end with a newline. But since it's - # possible (and valid in Python ) that there's no newline at the - # end of a file, we have to recover even if the user doesn't want - # error recovery. - #print('x', pprint.pprint(stack)) - ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) - - dfa, state, (type_, nodes) = stack[-1] - symbol = pgen_grammar.number2symbol[type_] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - if ilabel == i: - if symbol == 'simple_stmt': - # This is basically shifting - stack[-1] = (dfa, newstate, (type_, nodes)) - - reduce_stack(states, newstate) - add_token_callback(typ, value, start_pos, prefix) - return - # Check if we're at the right point - #for symbol, nodes in get_symbol_and_nodes(stack): - # self.pgen_parser._pop() - - #break - break - #symbol = pgen_grammar.number2symbol[type_] - - if not self._error_recovery: - return super(Parser, self).error_recovery( - pgen_grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback) - - def current_suite(stack): - # For now just discard everything that is not a suite or - # file_input, if we detect an error. - for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))): - # `suite` can sometimes be only simple_stmt, not stmt. - if symbol == 'file_input': - break - elif symbol == 'suite' and len(nodes) > 1: - # suites without an indent in them get discarded. - break - return index, symbol, nodes - - index, symbol, nodes = current_suite(stack) - - # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) - if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos): - add_token_callback(typ, value, start_pos, prefix) - else: - if typ == INDENT: - # For every deleted INDENT we have to delete a DEDENT as well. - # Otherwise the parser will get into trouble and DEDENT too early. - self._omit_dedent_list.append(self._indent_counter) - - error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) - stack[-1][2][1].append(error_leaf) - - if symbol == 'suite': - dfa, state, node = stack[-1] - states, first = dfa - arcs = states[state] - intended_label = pgen_grammar.symbol2label['stmt'] - # Introduce a proper state transition. We're basically allowing - # there to be no valid statements inside a suite. - if [x[0] for x in arcs] == [intended_label]: - new_state = arcs[0][1] - stack[-1] = dfa, new_state, node - - def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos): - failed_stack = False - found = False - all_nodes = [] - for dfa, state, (type_, nodes) in stack[start_index:]: - if nodes: - found = True - if found: - failed_stack = True - all_nodes += nodes - if failed_stack: - stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes)) - - stack[start_index:] = [] - return failed_stack - - def _recovery_tokenize(self, tokens): - for typ, value, start_pos, prefix in tokens: - # print(tok_name[typ], repr(value), start_pos, repr(prefix)) - if typ == DEDENT: - # We need to count indents, because if we just omit any DEDENT, - # we might omit them in the wrong place. - o = self._omit_dedent_list - if o and o[-1] == self._indent_counter: - o.pop() - continue - - self._indent_counter -= 1 - elif typ == INDENT: - self._indent_counter += 1 - yield typ, value, start_pos, prefix diff --git a/pythonFiles/parso/python/pep8.py b/pythonFiles/parso/python/pep8.py deleted file mode 100644 index 59fe452d06c4..000000000000 --- a/pythonFiles/parso/python/pep8.py +++ /dev/null @@ -1,727 +0,0 @@ -import re -from contextlib import contextmanager - -from parso.python.errors import ErrorFinder, ErrorFinderConfig -from parso.normalizer import Rule -from parso.python.tree import search_ancestor, Flow, Scope - - -_IMPORT_TYPES = ('import_name', 'import_from') -_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt', - 'for_stmt', 'try_stmt', 'with_stmt') -_NON_STAR_TYPES = ('term', 'import_from', 'power') -_OPENING_BRACKETS = '(', '[', '{' -_CLOSING_BRACKETS = ')', ']', '}' -_FACTOR = '+', '-', '~' -_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@' -_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^' -_NEEDS_SPACE = ('=', '%', '->', - '<', '>', '==', '>=', '<=', '<>', '!=', - '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', - '>>=', '**=', '//=') -_NEEDS_SPACE += _BITWISE_OPERATOR -_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument') -_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop') - - -class IndentationTypes(object): - VERTICAL_BRACKET = object() - HANGING_BRACKET = object() - BACKSLASH = object() - SUITE = object() - IMPLICIT = object() - - -class IndentationNode(object): - type = IndentationTypes.SUITE - - def __init__(self, config, indentation, parent=None): - self.bracket_indentation = self.indentation = indentation - self.parent = parent - - def __repr__(self): - return '<%s>' % self.__class__.__name__ - - def get_latest_suite_node(self): - n = self - while n is not None: - if n.type == IndentationTypes.SUITE: - return n - - n = n.parent - - -class BracketNode(IndentationNode): - def __init__(self, config, leaf, parent, in_suite_introducer=False): - self.leaf = leaf - - # Figure out here what the indentation is. For chained brackets - # we can basically use the previous indentation. - previous_leaf = leaf - n = parent - if n.type == IndentationTypes.IMPLICIT: - n = n.parent - while True: - if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line: - break - - previous_leaf = previous_leaf.get_previous_leaf() - if not isinstance(n, BracketNode) or previous_leaf != n.leaf: - break - n = n.parent - parent_indentation = n.indentation - - - next_leaf = leaf.get_next_leaf() - if '\n' in next_leaf.prefix: - # This implies code like: - # foobarbaz( - # a, - # b, - # ) - self.bracket_indentation = parent_indentation \ - + config.closing_bracket_hanging_indentation - self.indentation = parent_indentation + config.indentation - self.type = IndentationTypes.HANGING_BRACKET - else: - # Implies code like: - # foobarbaz( - # a, - # b, - # ) - expected_end_indent = leaf.end_pos[1] - if '\t' in config.indentation: - self.indentation = None - else: - self.indentation = ' ' * expected_end_indent - self.bracket_indentation = self.indentation - self.type = IndentationTypes.VERTICAL_BRACKET - - if in_suite_introducer and parent.type == IndentationTypes.SUITE \ - and self.indentation == parent_indentation + config.indentation: - self.indentation += config.indentation - # The closing bracket should have the same indentation. - self.bracket_indentation = self.indentation - self.parent = parent - - -class ImplicitNode(BracketNode): - """ - Implicit indentation after keyword arguments, default arguments, - annotations and dict values. - """ - def __init__(self, config, leaf, parent): - super(ImplicitNode, self).__init__(config, leaf, parent) - self.type = IndentationTypes.IMPLICIT - - next_leaf = leaf.get_next_leaf() - if leaf == ':' and '\n' not in next_leaf.prefix: - self.indentation += ' ' - - -class BackslashNode(IndentationNode): - type = IndentationTypes.BACKSLASH - - def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): - expr_stmt = search_ancestor(containing_leaf, 'expr_stmt') - if expr_stmt is not None: - equals = expr_stmt.children[-2] - - if '\t' in config.indentation: - # TODO unite with the code of BracketNode - self.indentation = None - else: - # If the backslash follows the equals, use normal indentation - # otherwise it should align with the equals. - if equals.end_pos == spacing.start_pos: - self.indentation = parent_indentation + config.indentation - else: - # +1 because there is a space. - self.indentation = ' ' * (equals.end_pos[1] + 1) - else: - self.indentation = parent_indentation + config.indentation - self.bracket_indentation = self.indentation - self.parent = parent - - -def _is_magic_name(name): - return name.value.startswith('__') and name.value.endswith('__') - - -class PEP8Normalizer(ErrorFinder): - def __init__(self, *args, **kwargs): - super(PEP8Normalizer, self).__init__(*args, **kwargs) - self._previous_part = None - self._previous_leaf = None - self._on_newline = True - self._newline_count = 0 - self._wanted_newline_count = None - self._max_new_lines_in_prefix = 0 - self._new_statement = True - self._implicit_indentation_possible = False - # The top of stack of the indentation nodes. - self._indentation_tos = self._last_indentation_tos = \ - IndentationNode(self._config, indentation='') - self._in_suite_introducer = False - - if ' ' in self._config.indentation: - self._indentation_type = 'spaces' - self._wrong_indentation_char = '\t' - else: - self._indentation_type = 'tabs' - self._wrong_indentation_char = ' ' - - @contextmanager - def visit_node(self, node): - with super(PEP8Normalizer, self).visit_node(node): - with self._visit_node(node): - yield - - @contextmanager - def _visit_node(self, node): - typ = node.type - - if typ in 'import_name': - names = node.get_defined_names() - if len(names) > 1: - for name in names[:1]: - self.add_issue(name, 401, 'Multiple imports on one line') - elif typ == 'lambdef': - expr_stmt = node.parent - # Check if it's simply defining a single name, not something like - # foo.bar or x[1], where using a lambda could make more sense. - if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]): - self.add_issue(node, 731, 'Do not assign a lambda expression, use a def') - elif typ == 'try_stmt': - for child in node.children: - # Here we can simply check if it's an except, because otherwise - # it would be an except_clause. - if child.type == 'keyword' and child.value == 'except': - self.add_issue(child, 722, 'Do not use bare except, specify exception instead') - elif typ == 'comparison': - for child in node.children: - if child.type not in ('atom_expr', 'power'): - continue - if len(child.children) > 2: - continue - trailer = child.children[1] - atom = child.children[0] - if trailer.type == 'trailer' and atom.type == 'name' \ - and atom.value == 'type': - self.add_issue(node, 721, "Do not compare types, use 'isinstance()") - break - elif typ == 'file_input': - endmarker = node.children[-1] - prev = endmarker.get_previous_leaf() - prefix = endmarker.prefix - if (not prefix.endswith('\n') and ( - prefix or prev is None or prev.value != '\n')): - self.add_issue(endmarker, 292, "No newline at end of file") - - if typ in _IMPORT_TYPES: - simple_stmt = node.parent - module = simple_stmt.parent - #if module.type == 'simple_stmt': - if module.type == 'file_input': - index = module.children.index(simple_stmt) - for child in module.children[:index]: - children = [child] - if child.type == 'simple_stmt': - # Remove the newline. - children = child.children[:-1] - - found_docstring = False - for c in children: - if c.type == 'string' and not found_docstring: - continue - found_docstring = True - - if c.type == 'expr_stmt' and \ - all(_is_magic_name(n) for n in c.get_defined_names()): - continue - - if c.type in _IMPORT_TYPES or isinstance(c, Flow): - continue - - self.add_issue(node, 402, 'Module level import not at top of file') - break - else: - continue - break - - implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES - in_introducer = typ in _SUITE_INTRODUCERS - if in_introducer: - self._in_suite_introducer = True - elif typ == 'suite': - if self._indentation_tos.type == IndentationTypes.BACKSLASH: - self._indentation_tos = self._indentation_tos.parent - - self._indentation_tos = IndentationNode( - self._config, - self._indentation_tos.indentation + self._config.indentation, - parent=self._indentation_tos - ) - elif implicit_indentation_possible: - self._implicit_indentation_possible = True - yield - if typ == 'suite': - assert self._indentation_tos.type == IndentationTypes.SUITE - self._indentation_tos = self._indentation_tos.parent - # If we dedent, no lines are needed anymore. - self._wanted_newline_count = None - elif implicit_indentation_possible: - self._implicit_indentation_possible = False - if self._indentation_tos.type == IndentationTypes.IMPLICIT: - self._indentation_tos = self._indentation_tos.parent - elif in_introducer: - self._in_suite_introducer = False - if typ in ('classdef', 'funcdef'): - self._wanted_newline_count = self._get_wanted_blank_lines_count() - - def _check_tabs_spaces(self, spacing): - if self._wrong_indentation_char in spacing.value: - self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type) - return True - return False - - def _get_wanted_blank_lines_count(self): - suite_node = self._indentation_tos.get_latest_suite_node() - return int(suite_node.parent is None) + 1 - - def _reset_newlines(self, spacing, leaf, is_comment=False): - self._max_new_lines_in_prefix = \ - max(self._max_new_lines_in_prefix, self._newline_count) - - wanted = self._wanted_newline_count - if wanted is not None: - # Need to substract one - blank_lines = self._newline_count - 1 - if wanted > blank_lines and leaf.type != 'endmarker': - # In case of a comment we don't need to add the issue, yet. - if not is_comment: - # TODO end_pos wrong. - code = 302 if wanted == 2 else 301 - message = "expected %s blank line, found %s" \ - % (wanted, blank_lines) - self.add_issue(spacing, code, message) - self._wanted_newline_count = None - else: - self._wanted_newline_count = None - - if not is_comment: - wanted = self._get_wanted_blank_lines_count() - actual = self._max_new_lines_in_prefix - 1 - - val = leaf.value - needs_lines = ( - val == '@' and leaf.parent.type == 'decorator' - or ( - val == 'class' - or val == 'async' and leaf.get_next_leaf() == 'def' - or val == 'def' and self._previous_leaf != 'async' - ) and leaf.parent.parent.type != 'decorated' - ) - if needs_lines and actual < wanted: - func_or_cls = leaf.parent - suite = func_or_cls.parent - if suite.type == 'decorated': - suite = suite.parent - - # The first leaf of a file or a suite should not need blank - # lines. - if suite.children[int(suite.type == 'suite')] != func_or_cls: - code = 302 if wanted == 2 else 301 - message = "expected %s blank line, found %s" \ - % (wanted, actual) - self.add_issue(spacing, code, message) - - self._max_new_lines_in_prefix = 0 - - self._newline_count = 0 - - def visit_leaf(self, leaf): - super(PEP8Normalizer, self).visit_leaf(leaf) - for part in leaf._split_prefix(): - if part.type == 'spacing': - # This part is used for the part call after for. - break - self._visit_part(part, part.create_spacing_part(), leaf) - - self._analyse_non_prefix(leaf) - self._visit_part(leaf, part, leaf) - - # Cleanup - self._last_indentation_tos = self._indentation_tos - - self._new_statement = leaf.type == 'newline' - - # TODO does this work? with brackets and stuff? - if leaf.type == 'newline' and \ - self._indentation_tos.type == IndentationTypes.BACKSLASH: - self._indentation_tos = self._indentation_tos.parent - - if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS: - self._in_suite_introducer = False - elif leaf.value == 'elif': - self._in_suite_introducer = True - - if not self._new_statement: - self._reset_newlines(part, leaf) - self._max_blank_lines = 0 - - self._previous_leaf = leaf - - return leaf.value - - def _visit_part(self, part, spacing, leaf): - value = part.value - type_ = part.type - if type_ == 'error_leaf': - return - - if value == ',' and part.parent.type == 'dictorsetmaker': - self._indentation_tos = self._indentation_tos.parent - - node = self._indentation_tos - - if type_ == 'comment': - if value.startswith('##'): - # Whole blocks of # should not raise an error. - if value.lstrip('#'): - self.add_issue(part, 266, "Too many leading '#' for block comment.") - elif self._on_newline: - if not re.match('#:? ', value) and not value == '#' \ - and not (value.startswith('#!') and part.start_pos == (1, 0)): - self.add_issue(part, 265, "Block comment should start with '# '") - else: - if not re.match('#:? [^ ]', value): - self.add_issue(part, 262, "Inline comment should start with '# '") - - self._reset_newlines(spacing, leaf, is_comment=True) - elif type_ == 'newline': - if self._newline_count > self._get_wanted_blank_lines_count(): - self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count) - elif leaf in ('def', 'class') \ - and leaf.parent.parent.type == 'decorated': - self.add_issue(part, 304, "Blank lines found after function decorator") - - - self._newline_count += 1 - - if type_ == 'backslash': - # TODO is this enough checking? What about ==? - if node.type != IndentationTypes.BACKSLASH: - if node.type != IndentationTypes.SUITE: - self.add_issue(part, 502, 'The backslash is redundant between brackets') - else: - indentation = node.indentation - if self._in_suite_introducer and node.type == IndentationTypes.SUITE: - indentation += self._config.indentation - - self._indentation_tos = BackslashNode( - self._config, - indentation, - part, - spacing, - parent=self._indentation_tos - ) - elif self._on_newline: - indentation = spacing.value - if node.type == IndentationTypes.BACKSLASH \ - and self._previous_part.type == 'newline': - self._indentation_tos = self._indentation_tos.parent - - if not self._check_tabs_spaces(spacing): - should_be_indentation = node.indentation - if type_ == 'comment': - # Comments can be dedented. So we have to care for that. - n = self._last_indentation_tos - while True: - if len(indentation) > len(n.indentation): - break - - should_be_indentation = n.indentation - - self._last_indentation_tos = n - if n == node: - break - n = n.parent - - if self._new_statement: - if type_ == 'newline': - if indentation: - self.add_issue(spacing, 291, 'Trailing whitespace') - elif indentation != should_be_indentation: - s = '%s %s' % (len(self._config.indentation), self._indentation_type) - self.add_issue(part, 111, 'Indentation is not a multiple of ' + s) - else: - if value in '])}': - should_be_indentation = node.bracket_indentation - else: - should_be_indentation = node.indentation - if self._in_suite_introducer and indentation == \ - node.get_latest_suite_node().indentation \ - + self._config.indentation: - self.add_issue(part, 129, "Line with same indent as next logical block") - elif indentation != should_be_indentation: - if not self._check_tabs_spaces(spacing) and part.value != '\n': - if value in '])}': - if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 124, "Closing bracket does not match visual indentation") - else: - self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line") - else: - if len(indentation) < len(should_be_indentation): - if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 128, 'Continuation line under-indented for visual indent') - elif node.type == IndentationTypes.BACKSLASH: - self.add_issue(part, 122, 'Continuation line missing indentation or outdented') - elif node.type == IndentationTypes.IMPLICIT: - self.add_issue(part, 135, 'xxx') - else: - self.add_issue(part, 121, 'Continuation line under-indented for hanging indent') - else: - if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 127, 'Continuation line over-indented for visual indent') - elif node.type == IndentationTypes.IMPLICIT: - self.add_issue(part, 136, 'xxx') - else: - self.add_issue(part, 126, 'Continuation line over-indented for hanging indent') - else: - self._check_spacing(part, spacing) - - self._check_line_length(part, spacing) - # ------------------------------- - # Finalizing. Updating the state. - # ------------------------------- - if value and value in '()[]{}' and type_ != 'error_leaf' \ - and part.parent.type != 'error_node': - if value in _OPENING_BRACKETS: - self._indentation_tos = BracketNode( - self._config, part, - parent=self._indentation_tos, - in_suite_introducer=self._in_suite_introducer - ) - else: - assert node.type != IndentationTypes.IMPLICIT - self._indentation_tos = self._indentation_tos.parent - elif value in ('=', ':') and self._implicit_indentation_possible \ - and part.parent.type in _IMPLICIT_INDENTATION_TYPES: - indentation = node.indentation - self._indentation_tos = ImplicitNode( - self._config, part, parent=self._indentation_tos - ) - - self._on_newline = type_ in ('newline', 'backslash', 'bom') - - self._previous_part = part - self._previous_spacing = spacing - - def _check_line_length(self, part, spacing): - if part.type == 'backslash': - last_column = part.start_pos[1] + 1 - else: - last_column = part.end_pos[1] - if last_column > self._config.max_characters \ - and spacing.start_pos[1] <= self._config.max_characters : - # Special case for long URLs in multi-line docstrings or comments, - # but still report the error when the 72 first chars are whitespaces. - report = True - if part.type == 'comment': - splitted = part.value[1:].split() - if len(splitted) == 1 \ - and (part.end_pos[1] - len(splitted[0])) < 72: - report = False - if report: - self.add_issue( - part, - 501, - 'Line too long (%s > %s characters)' % - (last_column, self._config.max_characters), - ) - - def _check_spacing(self, part, spacing): - def add_if_spaces(*args): - if spaces: - return self.add_issue(*args) - - def add_not_spaces(*args): - if not spaces: - return self.add_issue(*args) - - spaces = spacing.value - prev = self._previous_part - if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf': - return - - type_ = part.type - if '\t' in spaces: - self.add_issue(spacing, 223, 'Used tab to separate tokens') - elif type_ == 'comment': - if len(spaces) < self._config.spaces_before_comment: - self.add_issue(spacing, 261, 'At least two spaces before inline comment') - elif type_ == 'newline': - add_if_spaces(spacing, 291, 'Trailing whitespace') - elif len(spaces) > 1: - self.add_issue(spacing, 221, 'Multiple spaces used') - else: - if prev in _OPENING_BRACKETS: - message = "Whitespace after '%s'" % part.value - add_if_spaces(spacing, 201, message) - elif part in _CLOSING_BRACKETS: - message = "Whitespace before '%s'" % part.value - add_if_spaces(spacing, 202, message) - elif part in (',', ';') or part == ':' \ - and part.parent.type not in _POSSIBLE_SLICE_PARENTS: - message = "Whitespace before '%s'" % part.value - add_if_spaces(spacing, 203, message) - elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS: - pass # TODO - elif prev in (',', ';', ':'): - add_not_spaces(spacing, 231, "missing whitespace after '%s'") - elif part == ':': # Is a subscript - # TODO - pass - elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \ - or prev in ('*', '**') \ - and prev.parent.type not in _NON_STAR_TYPES: - # TODO - pass - elif prev in _FACTOR and prev.parent.type == 'factor': - pass - elif prev == '@' and prev.parent.type == 'decorator': - pass # TODO should probably raise an error if there's a space here - elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE: - if part == '=' and part.parent.type in ('argument', 'param') \ - or prev == '=' and prev.parent.type in ('argument', 'param'): - if part == '=': - param = part.parent - else: - param = prev.parent - if param.type == 'param' and param.annotation: - add_not_spaces(spacing, 252, 'Expected spaces around annotation equals') - else: - add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals') - elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: - add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator') - elif part == '%' or prev == '%': - add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator') - else: - message_225 = 'Missing whitespace between tokens' - add_not_spaces(spacing, 225, message_225) - elif type_ == 'keyword' or prev.type == 'keyword': - add_not_spaces(spacing, 275, 'Missing whitespace around keyword') - else: - prev_spacing = self._previous_spacing - if prev in _ALLOW_SPACE and spaces != prev_spacing.value \ - and '\n' not in self._previous_leaf.prefix: - message = "Whitespace before operator doesn't match with whitespace after" - self.add_issue(spacing, 229, message) - - if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: - message_225 = 'Missing whitespace between tokens' - #print('xy', spacing) - #self.add_issue(spacing, 225, message_225) - # TODO why only brackets? - if part in _OPENING_BRACKETS: - message = "Whitespace before '%s'" % part.value - add_if_spaces(spacing, 211, message) - - def _analyse_non_prefix(self, leaf): - typ = leaf.type - if typ == 'name' and leaf.value in ('l', 'O', 'I'): - if leaf.is_definition(): - message = "Do not define %s named 'l', 'O', or 'I' one line" - if leaf.parent.type == 'class' and leaf.parent.name == leaf: - self.add_issue(leaf, 742, message % 'classes') - elif leaf.parent.type == 'function' and leaf.parent.name == leaf: - self.add_issue(leaf, 743, message % 'function') - else: - self.add_issuadd_issue(741, message % 'variables', leaf) - elif leaf.value == ':': - if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef': - next_leaf = leaf.get_next_leaf() - if next_leaf.type != 'newline': - if leaf.parent.type == 'funcdef': - self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)') - else: - self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)') - elif leaf.value == ';': - if leaf.get_next_leaf().type in ('newline', 'endmarker'): - self.add_issue(leaf, 703, 'Statement ends with a semicolon') - else: - self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)') - elif leaf.value in ('==', '!='): - comparison = leaf.parent - index = comparison.children.index(leaf) - left = comparison.children[index - 1] - right = comparison.children[index + 1] - for node in left, right: - if node.type == 'keyword' or node.type == 'name': - if node.value == 'None': - message = "comparison to None should be 'if cond is None:'" - self.add_issue(leaf, 711, message) - break - elif node.value in ('True', 'False'): - message = "comparison to False/True should be 'if cond is True:' or 'if cond:'" - self.add_issue(leaf, 712, message) - break - elif leaf.value in ('in', 'is'): - comparison = leaf.parent - if comparison.type == 'comparison' and comparison.parent.type == 'not_test': - if leaf.value == 'in': - self.add_issue(leaf, 713, "test for membership should be 'not in'") - else: - self.add_issue(leaf, 714, "test for object identity should be 'is not'") - elif typ == 'string': - # Checking multiline strings - for i, line in enumerate(leaf.value.splitlines()[1:]): - indentation = re.match('[ \t]*', line).group(0) - start_pos = leaf.line + i, len(indentation) - # TODO check multiline indentation. - elif typ == 'endmarker': - if self._newline_count >= 2: - self.add_issue(leaf, 391, 'Blank line at end of file') - - def add_issue(self, node, code, message): - if self._previous_leaf is not None: - if search_ancestor(self._previous_leaf, 'error_node') is not None: - return - if self._previous_leaf.type == 'error_leaf': - return - if search_ancestor(node, 'error_node') is not None: - return - if code in (901, 903): - # 901 and 903 are raised by the ErrorFinder. - super(PEP8Normalizer, self).add_issue(node, code, message) - else: - # Skip ErrorFinder here, because it has custom behavior. - super(ErrorFinder, self).add_issue(node, code, message) - - -class PEP8NormalizerConfig(ErrorFinderConfig): - normalizer_class = PEP8Normalizer - """ - Normalizing to PEP8. Not really implemented, yet. - """ - def __init__(self, indentation=' ' * 4, hanging_indentation=None, - max_characters=79, spaces_before_comment=2): - self.indentation = indentation - if hanging_indentation is None: - hanging_indentation = indentation - self.hanging_indentation = hanging_indentation - self.closing_bracket_hanging_indentation = '' - self.break_after_binary = False - self.max_characters = max_characters - self.spaces_before_comment = spaces_before_comment - - -# TODO this is not yet ready. -#@PEP8Normalizer.register_rule(type='endmarker') -class BlankLineAtEnd(Rule): - code = 392 - message = 'Blank line at end of file' - - def is_issue(self, leaf): - return self._newline_count >= 2 diff --git a/pythonFiles/parso/python/prefix.py b/pythonFiles/parso/python/prefix.py deleted file mode 100644 index b7f1e1bc4db9..000000000000 --- a/pythonFiles/parso/python/prefix.py +++ /dev/null @@ -1,97 +0,0 @@ -import re -from codecs import BOM_UTF8 - -from parso.python.tokenize import group - -unicode_bom = BOM_UTF8.decode('utf-8') - - -class PrefixPart(object): - def __init__(self, leaf, typ, value, spacing='', start_pos=None): - assert start_pos is not None - self.parent = leaf - self.type = typ - self.value = value - self.spacing = spacing - self.start_pos = start_pos - - @property - def end_pos(self): - if self.value.endswith('\n'): - return self.start_pos[0] + 1, 0 - if self.value == unicode_bom: - # The bom doesn't have a length at the start of a Python file. - return self.start_pos - return self.start_pos[0], self.start_pos[1] + len(self.value) - - def create_spacing_part(self): - column = self.start_pos[1] - len(self.spacing) - return PrefixPart( - self.parent, 'spacing', self.spacing, - start_pos=(self.start_pos[0], column) - ) - - def __repr__(self): - return '%s(%s, %s, %s)' % ( - self.__class__.__name__, - self.type, - repr(self.value), - self.start_pos - ) - - -_comment = r'#[^\n\r\f]*' -_backslash = r'\\\r?\n' -_newline = r'\r?\n' -_form_feed = r'\f' -_only_spacing = '$' -_spacing = r'[ \t]*' -_bom = unicode_bom - -_regex = group( - _comment, _backslash, _newline, _form_feed, _only_spacing, _bom, - capture=True -) -_regex = re.compile(group(_spacing, capture=True) + _regex) - - -_types = { - '#': 'comment', - '\\': 'backslash', - '\f': 'formfeed', - '\n': 'newline', - '\r': 'newline', - unicode_bom: 'bom' -} - - -def split_prefix(leaf, start_pos): - line, column = start_pos - start = 0 - value = spacing = '' - bom = False - while start != len(leaf.prefix): - match =_regex.match(leaf.prefix, start) - spacing = match.group(1) - value = match.group(2) - if not value: - break - type_ = _types[value[0]] - yield PrefixPart( - leaf, type_, value, spacing, - start_pos=(line, column + start - int(bom) + len(spacing)) - ) - if type_ == 'bom': - bom = True - - start = match.end(0) - if value.endswith('\n'): - line += 1 - column = -start - - if value: - spacing = '' - yield PrefixPart( - leaf, 'spacing', spacing, - start_pos=(line, column + start) - ) diff --git a/pythonFiles/parso/python/token.py b/pythonFiles/parso/python/token.py deleted file mode 100644 index dd849b01daa7..000000000000 --- a/pythonFiles/parso/python/token.py +++ /dev/null @@ -1,113 +0,0 @@ -from __future__ import absolute_import -from itertools import count -from token import * - -from parso._compatibility import py_version - - -_counter = count(N_TOKENS) -# Never want to see this thing again. -del N_TOKENS - -COMMENT = next(_counter) -tok_name[COMMENT] = 'COMMENT' - -NL = next(_counter) -tok_name[NL] = 'NL' - -# Sets the attributes that don't exist in these tok_name versions. -if py_version >= 30: - BACKQUOTE = next(_counter) - tok_name[BACKQUOTE] = 'BACKQUOTE' -else: - RARROW = next(_counter) - tok_name[RARROW] = 'RARROW' - ELLIPSIS = next(_counter) - tok_name[ELLIPSIS] = 'ELLIPSIS' - -if py_version < 35: - ATEQUAL = next(_counter) - tok_name[ATEQUAL] = 'ATEQUAL' - -ERROR_DEDENT = next(_counter) -tok_name[ERROR_DEDENT] = 'ERROR_DEDENT' - -FSTRING_START = next(_counter) -tok_name[FSTRING_START] = 'FSTRING_START' -FSTRING_END = next(_counter) -tok_name[FSTRING_END] = 'FSTRING_END' -FSTRING_STRING = next(_counter) -tok_name[FSTRING_STRING] = 'FSTRING_STRING' -EXCLAMATION = next(_counter) -tok_name[EXCLAMATION] = 'EXCLAMATION' - -# Map from operator to number (since tokenize doesn't do this) - -opmap_raw = """\ -( LPAR -) RPAR -[ LSQB -] RSQB -: COLON -, COMMA -; SEMI -+ PLUS -- MINUS -* STAR -/ SLASH -| VBAR -& AMPER -< LESS -> GREATER -= EQUAL -. DOT -% PERCENT -` BACKQUOTE -{ LBRACE -} RBRACE -@ AT -== EQEQUAL -!= NOTEQUAL -<> NOTEQUAL -<= LESSEQUAL ->= GREATEREQUAL -~ TILDE -^ CIRCUMFLEX -<< LEFTSHIFT ->> RIGHTSHIFT -** DOUBLESTAR -+= PLUSEQUAL --= MINEQUAL -*= STAREQUAL -/= SLASHEQUAL -%= PERCENTEQUAL -&= AMPEREQUAL -|= VBAREQUAL -@= ATEQUAL -^= CIRCUMFLEXEQUAL -<<= LEFTSHIFTEQUAL ->>= RIGHTSHIFTEQUAL -**= DOUBLESTAREQUAL -// DOUBLESLASH -//= DOUBLESLASHEQUAL --> RARROW -... ELLIPSIS -! EXCLAMATION -""" - -opmap = {} -for line in opmap_raw.splitlines(): - op, name = line.split() - opmap[op] = globals()[name] - - -def generate_token_id(string): - """ - Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding - ID for it. The strings are part of the grammar file. - """ - try: - return opmap[string] - except KeyError: - pass - return globals()[string] diff --git a/pythonFiles/parso/python/tokenize.py b/pythonFiles/parso/python/tokenize.py deleted file mode 100644 index 31f081d9b804..000000000000 --- a/pythonFiles/parso/python/tokenize.py +++ /dev/null @@ -1,602 +0,0 @@ -# -*- coding: utf-8 -*- -""" -This tokenizer has been copied from the ``tokenize.py`` standard library -tokenizer. The reason was simple: The standard library tokenizer fails -if the indentation is not right. To make it possible to do error recovery the - tokenizer needed to be rewritten. - -Basically this is a stripped down version of the standard library module, so -you can read the documentation there. Additionally we included some speed and -memory optimizations here. -""" -from __future__ import absolute_import - -import sys -import string -import re -from collections import namedtuple -import itertools as _itertools -from codecs import BOM_UTF8 - -from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, - NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, - ERROR_DEDENT, FSTRING_STRING, FSTRING_START, - FSTRING_END) -from parso._compatibility import py_version -from parso.utils import split_lines - - -TokenCollection = namedtuple( - 'TokenCollection', - 'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens', -) - -BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') - -_token_collection_cache = {} - -if py_version >= 30: - # Python 3 has str.isidentifier() to check if a char is a valid identifier - is_identifier = str.isidentifier -else: - namechars = string.ascii_letters + '_' - is_identifier = lambda s: s in namechars - - -def group(*choices, **kwargs): - capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :( - assert not kwargs - - start = '(' - if not capture: - start += '?:' - return start + '|'.join(choices) + ')' - - -def maybe(*choices): - return group(*choices) + '?' - - -# Return the empty string, plus all of the valid string prefixes. -def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False): - def different_case_versions(prefix): - for s in _itertools.product(*[(c, c.upper()) for c in prefix]): - yield ''.join(s) - # The valid string prefixes. Only contain the lower case versions, - # and don't contain any permuations (include 'fr', but not - # 'rf'). The various permutations will be generated. - valid_string_prefixes = ['b', 'r', 'u'] - if version_info >= (3, 0): - valid_string_prefixes.append('br') - - result = set(['']) - if version_info >= (3, 6) and include_fstring: - f = ['f', 'fr'] - if only_fstring: - valid_string_prefixes = f - result = set() - else: - valid_string_prefixes += f - elif only_fstring: - return set() - - # if we add binary f-strings, add: ['fb', 'fbr'] - for prefix in valid_string_prefixes: - for t in _itertools.permutations(prefix): - # create a list with upper and lower versions of each - # character - result.update(different_case_versions(t)) - if version_info <= (2, 7): - # In Python 2 the order cannot just be random. - result.update(different_case_versions('ur')) - result.update(different_case_versions('br')) - return result - - -def _compile(expr): - return re.compile(expr, re.UNICODE) - - -def _get_token_collection(version_info): - try: - return _token_collection_cache[tuple(version_info)] - except KeyError: - _token_collection_cache[tuple(version_info)] = result = \ - _create_token_collection(version_info) - return result - - -fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') -fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') - - -def _create_token_collection(version_info): - # Note: we use unicode matching for names ("\w") but ascii matching for - # number literals. - Whitespace = r'[ \f\t]*' - Comment = r'#[^\r\n]*' - Name = r'\w+' - - if version_info >= (3, 6): - Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' - Binnumber = r'0[bB](?:_?[01])+' - Octnumber = r'0[oO](?:_?[0-7])+' - Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' - Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', - r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) - Expfloat = r'[0-9](?:_?[0-9])*' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') - else: - Hexnumber = r'0[xX][0-9a-fA-F]+' - Binnumber = r'0[bB][01]+' - if version_info >= (3, 0): - Octnumber = r'0[oO][0-7]+' - else: - Octnumber = '0[oO]?[0-7]+' - Decnumber = r'(?:0+|[1-9][0-9]*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - Exponent = r'[eE][-+]?[0-9]+' - Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) - Expfloat = r'[0-9]+' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') - Number = group(Imagnumber, Floatnumber, Intnumber) - - # Note that since _all_string_prefixes includes the empty string, - # StringPrefix can be the empty string (making it optional). - possible_prefixes = _all_string_prefixes(version_info) - StringPrefix = group(*possible_prefixes) - StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True)) - fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True) - FStringStart = group(*fstring_prefixes) - - # Tail end of ' string. - Single = r"[^'\\]*(?:\\.[^'\\]*)*'" - # Tail end of " string. - Double = r'[^"\\]*(?:\\.[^"\\]*)*"' - # Tail end of ''' string. - Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" - # Tail end of """ string. - Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' - Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""') - - # Because of leftmost-then-longest match semantics, be sure to put the - # longest operators first (e.g., if = came before ==, == would get - # recognized as two instances of =). - Operator = group(r"\*\*=?", r">>=?", r"<<=?", - r"//=?", r"->", - r"[+\-*/%&@`|^!=<>]=?", - r"~") - - Bracket = '[][(){}]' - - special_args = [r'\r?\n', r'[:;.,@]'] - if version_info >= (3, 0): - special_args.insert(0, r'\.\.\.') - Special = group(*special_args) - - Funny = group(Operator, Bracket, Special) - - # First (or only) line of ' or " string. - ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) - pseudo_extra_pool = [Comment, Triple] - all_quotes = '"', "'", '"""', "'''" - if fstring_prefixes: - pseudo_extra_pool.append(FStringStart + group(*all_quotes)) - - PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool) - PseudoToken = group(Whitespace, capture=True) + \ - group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) - - # For a given string prefix plus quotes, endpats maps it to a regex - # to match the remainder of that string. _prefix can be empty, for - # a normal single or triple quoted string (with no prefix). - endpats = {} - for _prefix in possible_prefixes: - endpats[_prefix + "'"] = _compile(Single) - endpats[_prefix + '"'] = _compile(Double) - endpats[_prefix + "'''"] = _compile(Single3) - endpats[_prefix + '"""'] = _compile(Double3) - - # A set of all of the single and triple quoted string prefixes, - # including the opening quotes. - single_quoted = set() - triple_quoted = set() - fstring_pattern_map = {} - for t in possible_prefixes: - for quote in '"', "'": - single_quoted.add(t + quote) - - for quote in '"""', "'''": - triple_quoted.add(t + quote) - - for t in fstring_prefixes: - for quote in all_quotes: - fstring_pattern_map[t + quote] = quote - - ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', - 'finally', 'while', 'with', 'return') - pseudo_token_compiled = _compile(PseudoToken) - return TokenCollection( - pseudo_token_compiled, single_quoted, triple_quoted, endpats, - fstring_pattern_map, ALWAYS_BREAK_TOKENS - ) - - -class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): - @property - def end_pos(self): - lines = split_lines(self.string) - if len(lines) > 1: - return self.start_pos[0] + len(lines) - 1, 0 - else: - return self.start_pos[0], self.start_pos[1] + len(self.string) - - -class PythonToken(Token): - def _get_type_name(self, exact=True): - return tok_name[self.type] - - def __repr__(self): - return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' % - self._replace(type=self._get_type_name())) - - -class FStringNode(object): - def __init__(self, quote): - self.quote = quote - self.parentheses_count = 0 - self.previous_lines = '' - self.last_string_start_pos = None - # In the syntax there can be multiple format_spec's nested: - # {x:{y:3}} - self.format_spec_count = 0 - - def open_parentheses(self, character): - self.parentheses_count += 1 - - def close_parentheses(self, character): - self.parentheses_count -= 1 - - def allow_multiline(self): - return len(self.quote) == 3 - - def is_in_expr(self): - return (self.parentheses_count - self.format_spec_count) > 0 - - -def _check_fstring_ending(fstring_stack, token, from_start=False): - fstring_end = float('inf') - fstring_index = None - for i, node in enumerate(fstring_stack): - if from_start: - if token.startswith(node.quote): - fstring_index = i - fstring_end = len(node.quote) - else: - continue - else: - try: - end = token.index(node.quote) - except ValueError: - pass - else: - if fstring_index is None or end < fstring_end: - fstring_index = i - fstring_end = end - return fstring_index, fstring_end - - -def _find_fstring_string(fstring_stack, line, lnum, pos): - tos = fstring_stack[-1] - if tos.is_in_expr(): - return '', pos - else: - new_pos = pos - allow_multiline = tos.allow_multiline() - if allow_multiline: - match = fstring_string_multi_line.match(line, pos) - else: - match = fstring_string_single_line.match(line, pos) - if match is None: - string = tos.previous_lines - else: - if not tos.previous_lines: - tos.last_string_start_pos = (lnum, pos) - - string = match.group(0) - for fstring_stack_node in fstring_stack: - try: - string = string[:string.index(fstring_stack_node.quote)] - except ValueError: - pass # The string was not found. - - new_pos += len(string) - if allow_multiline and string.endswith('\n'): - tos.previous_lines += string - string = '' - else: - string = tos.previous_lines + string - - return string, new_pos - - -def tokenize(code, version_info, start_pos=(1, 0)): - """Generate tokens from a the source code (string).""" - lines = split_lines(code, keepends=True) - return tokenize_lines(lines, version_info, start_pos=start_pos) - - -def _print_tokens(func): - """ - A small helper function to help debug the tokenize_lines function. - """ - def wrapper(*args, **kwargs): - for token in func(*args, **kwargs): - print(token) - yield token - - return wrapper - - -# @_print_tokens -def tokenize_lines(lines, version_info, start_pos=(1, 0)): - """ - A heavily modified Python standard library tokenizer. - - Additionally to the default information, yields also the prefix of each - token. This idea comes from lib2to3. The prefix contains all information - that is irrelevant for the parser like newlines in parentheses or comments. - """ - pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \ - _get_token_collection(version_info) - paren_level = 0 # count parentheses - indents = [0] - max = 0 - numchars = '0123456789' - contstr = '' - contline = None - # We start with a newline. This makes indent at the first position - # possible. It's not valid Python, but still better than an INDENT in the - # second line (and not in the first). This makes quite a few things in - # Jedi's fast parser possible. - new_line = True - prefix = '' # Should never be required, but here for safety - additional_prefix = '' - first = True - lnum = start_pos[0] - 1 - fstring_stack = [] - for line in lines: # loop over lines in stream - lnum += 1 - pos = 0 - max = len(line) - if first: - if line.startswith(BOM_UTF8_STRING): - additional_prefix = BOM_UTF8_STRING - line = line[1:] - max = len(line) - - # Fake that the part before was already parsed. - line = '^' * start_pos[1] + line - pos = start_pos[1] - max += start_pos[1] - - first = False - - if contstr: # continued string - endmatch = endprog.match(line) - if endmatch: - pos = endmatch.end(0) - yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) - contstr = '' - contline = None - else: - contstr = contstr + line - contline = contline + line - continue - - while pos < max: - if fstring_stack: - string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) - if string: - yield PythonToken( - FSTRING_STRING, string, - fstring_stack[-1].last_string_start_pos, - # Never has a prefix because it can start anywhere and - # include whitespace. - prefix='' - ) - fstring_stack[-1].previous_lines = '' - continue - - if pos == max: - break - - rest = line[pos:] - fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True) - - if fstring_index is not None: - yield PythonToken( - FSTRING_END, - fstring_stack[fstring_index].quote, - (lnum, pos), - prefix=additional_prefix, - ) - additional_prefix = '' - del fstring_stack[fstring_index:] - pos += end - continue - - pseudomatch = pseudo_token.match(line, pos) - if not pseudomatch: # scan for tokens - txt = line[pos:] - if txt.endswith('\n'): - new_line = True - yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix) - additional_prefix = '' - break - - prefix = additional_prefix + pseudomatch.group(1) - additional_prefix = '' - start, pos = pseudomatch.span(2) - spos = (lnum, start) - token = pseudomatch.group(2) - if token == '': - assert prefix - additional_prefix = prefix - # This means that we have a line with whitespace/comments at - # the end, which just results in an endmarker. - break - initial = token[0] - - if new_line and initial not in '\r\n#': - new_line = False - if paren_level == 0 and not fstring_stack: - i = 0 - while line[i] == '\f': - i += 1 - # TODO don't we need to change spos as well? - start -= 1 - if start > indents[-1]: - yield PythonToken(INDENT, '', spos, '') - indents.append(start) - while start < indents[-1]: - if start > indents[-2]: - yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') - break - yield PythonToken(DEDENT, '', spos, '') - indents.pop() - - if fstring_stack: - fstring_index, end = _check_fstring_ending(fstring_stack, token) - if fstring_index is not None: - if end != 0: - yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) - prefix = '' - - yield PythonToken( - FSTRING_END, - fstring_stack[fstring_index].quote, - (lnum, spos[1] + 1), - prefix=prefix - ) - del fstring_stack[fstring_index:] - pos -= len(token) - end - continue - - if (initial in numchars or # ordinary number - (initial == '.' and token != '.' and token != '...')): - yield PythonToken(NUMBER, token, spos, prefix) - elif initial in '\r\n': - if any(not f.allow_multiline() for f in fstring_stack): - # Would use fstring_stack.clear, but that's not available - # in Python 2. - fstring_stack[:] = [] - - if not new_line and paren_level == 0 and not fstring_stack: - yield PythonToken(NEWLINE, token, spos, prefix) - else: - additional_prefix = prefix + token - new_line = True - elif initial == '#': # Comments - assert not token.endswith("\n") - additional_prefix = prefix + token - elif token in triple_quoted: - endprog = endpats[token] - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - yield PythonToken(STRING, token, spos, prefix) - else: - contstr_start = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - elif initial in single_quoted or \ - token[:2] in single_quoted or \ - token[:3] in single_quoted: - if token[-1] == '\n': # continued string - contstr_start = lnum, start - endprog = (endpats.get(initial) or endpats.get(token[1]) - or endpats.get(token[2])) - contstr = line[start:] - contline = line - break - else: # ordinary string - yield PythonToken(STRING, token, spos, prefix) - elif token in fstring_pattern_map: # The start of an fstring. - fstring_stack.append(FStringNode(fstring_pattern_map[token])) - yield PythonToken(FSTRING_START, token, spos, prefix) - elif is_identifier(initial): # ordinary name - if token in always_break_tokens: - fstring_stack[:] = [] - paren_level = 0 - while True: - indent = indents.pop() - if indent > start: - yield PythonToken(DEDENT, '', spos, '') - else: - indents.append(indent) - break - yield PythonToken(NAME, token, spos, prefix) - elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt - additional_prefix += prefix + line[start:] - break - else: - if token in '([{': - if fstring_stack: - fstring_stack[-1].open_parentheses(token) - else: - paren_level += 1 - elif token in ')]}': - if fstring_stack: - fstring_stack[-1].close_parentheses(token) - else: - paren_level -= 1 - elif token == ':' and fstring_stack \ - and fstring_stack[-1].parentheses_count == 1: - fstring_stack[-1].format_spec_count += 1 - - try: - # This check is needed in any case to check if it's a valid - # operator or just some random unicode character. - typ = opmap[token] - except KeyError: - typ = ERRORTOKEN - yield PythonToken(typ, token, spos, prefix) - - if contstr: - yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) - if contstr.endswith('\n'): - new_line = True - - end_pos = lnum, max - # As the last position we just take the maximally possible position. We - # remove -1 for the last new line. - for indent in indents[1:]: - yield PythonToken(DEDENT, '', end_pos, '') - yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) - - -if __name__ == "__main__": - if len(sys.argv) >= 2: - path = sys.argv[1] - with open(path) as f: - code = f.read() - else: - code = sys.stdin.read() - - from parso.utils import python_bytes_to_unicode, parse_version_string - - if isinstance(code, bytes): - code = python_bytes_to_unicode(code) - - for token in tokenize(code, parse_version_string()): - print(token) diff --git a/pythonFiles/parso/python/tree.py b/pythonFiles/parso/python/tree.py deleted file mode 100644 index e2bf010bdff0..000000000000 --- a/pythonFiles/parso/python/tree.py +++ /dev/null @@ -1,1192 +0,0 @@ -""" -This is the syntax tree for Python syntaxes (2 & 3). The classes represent -syntax elements like functions and imports. - -All of the nodes can be traced back to the `Python grammar file -`_. If you want to know how -a tree is structured, just analyse that file (for each Python version it's a -bit different). - -There's a lot of logic here that makes it easier for Jedi (and other libraries) -to deal with a Python syntax tree. - -By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get -back the 1-to-1 representation of the input given to the parser. This is -important if you want to refactor a parser tree. - ->>> from parso import parse ->>> parser = parse('import os') ->>> module = parser.get_root_node() ->>> module - - -Any subclasses of :class:`Scope`, including :class:`Module` has an attribute -:attr:`iter_imports `: - ->>> list(module.iter_imports()) -[] - -Changes to the Python Grammar ------------------------------ - -A few things have changed when looking at Python grammar files: - -- :class:`Param` does not exist in Python grammar files. It is essentially a - part of a ``parameters`` node. |parso| splits it up to make it easier to - analyse parameters. However this just makes it easier to deal with the syntax - tree, it doesn't actually change the valid syntax. -- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the - syntax tree to make it easier to do deal with them. - -Parser Tree Classes -------------------- -""" - -import re - -from parso._compatibility import utf8_repr, unicode -from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ - search_ancestor -from parso.python.prefix import split_prefix - -_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', - 'with_stmt', 'async_stmt', 'suite']) -_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS -_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS -_GET_DEFINITION_TYPES = set([ - 'expr_stmt', 'comp_for', 'with_stmt', 'for_stmt', 'import_name', - 'import_from', 'param' -]) -_IMPORTS = set(['import_name', 'import_from']) - - - -class DocstringMixin(object): - __slots__ = () - - def get_doc_node(self): - """ - Returns the string leaf of a docstring. e.g. ``r'''foo'''``. - """ - if self.type == 'file_input': - node = self.children[0] - elif self.type in ('funcdef', 'classdef'): - node = self.children[self.children.index(':') + 1] - if node.type == 'suite': # Normally a suite - node = node.children[1] # -> NEWLINE stmt - else: # ExprStmt - simple_stmt = self.parent - c = simple_stmt.parent.children - index = c.index(simple_stmt) - if not index: - return None - node = c[index - 1] - - if node.type == 'simple_stmt': - node = node.children[0] - if node.type == 'string': - return node - return None - - -class PythonMixin(object): - """ - Some Python specific utitilies. - """ - __slots__ = () - - def get_name_of_position(self, position): - """ - Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if - there is no name at that position. - """ - for c in self.children: - if isinstance(c, Leaf): - if c.type == 'name' and c.start_pos <= position <= c.end_pos: - return c - else: - result = c.get_name_of_position(position) - if result is not None: - return result - return None - - -class PythonLeaf(PythonMixin, Leaf): - __slots__ = () - - def _split_prefix(self): - return split_prefix(self, self.get_start_pos_of_prefix()) - - def get_start_pos_of_prefix(self): - """ - Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`. - """ - # TODO it is really ugly that we have to override it. Maybe change - # indent error leafs somehow? No idea how, though. - previous_leaf = self.get_previous_leaf() - if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ - and previous_leaf.original_type in ('indent', 'error_dedent'): - previous_leaf = previous_leaf.get_previous_leaf() - - if previous_leaf is None: - return self.line - self.prefix.count('\n'), 0 # It's the first leaf. - return previous_leaf.end_pos - - - -class _LeafWithoutNewlines(PythonLeaf): - """ - Simply here to optimize performance. - """ - __slots__ = () - - @property - def end_pos(self): - return self.line, self.column + len(self.value) - - -# Python base classes -class PythonBaseNode(PythonMixin, BaseNode): - __slots__ = () - - -class PythonNode(PythonMixin, Node): - __slots__ = () - - -class PythonErrorNode(PythonMixin, ErrorNode): - __slots__ = () - - -class PythonErrorLeaf(ErrorLeaf, PythonLeaf): - __slots__ = () - - -class EndMarker(_LeafWithoutNewlines): - __slots__ = () - type = 'endmarker' - - -class Newline(PythonLeaf): - """Contains NEWLINE and ENDMARKER tokens.""" - __slots__ = () - type = 'newline' - - @utf8_repr - def __repr__(self): - return "<%s: %s>" % (type(self).__name__, repr(self.value)) - - -class Name(_LeafWithoutNewlines): - """ - A string. Sometimes it is important to know if the string belongs to a name - or not. - """ - type = 'name' - __slots__ = () - - def __repr__(self): - return "<%s: %s@%s,%s>" % (type(self).__name__, self.value, - self.line, self.column) - - def is_definition(self): - """ - Returns True if the name is being defined. - """ - return self.get_definition() is not None - - def get_definition(self, import_name_always=False): - """ - Returns None if there's on definition for a name. - - :param import_name_alway: Specifies if an import name is always a - definition. Normally foo in `from foo import bar` is not a - definition. - """ - node = self.parent - type_ = node.type - if type_ in ('power', 'atom_expr'): - # In `self.x = 3` self is not a definition, but x is. - return None - - if type_ in ('funcdef', 'classdef'): - if self == node.name: - return node - return None - - if type_ == 'except_clause': - # TODO in Python 2 this doesn't work correctly. See grammar file. - # I think we'll just let it be. Python 2 will be gone in a few - # years. - if self.get_previous_sibling() == 'as': - return node.parent # The try_stmt. - return None - - while node is not None: - if node.type == 'suite': - return None - if node.type in _GET_DEFINITION_TYPES: - if self in node.get_defined_names(): - return node - if import_name_always and node.type in _IMPORTS: - return node - return None - node = node.parent - return None - - - -class Literal(PythonLeaf): - __slots__ = () - - -class Number(Literal): - type = 'number' - __slots__ = () - - -class String(Literal): - type = 'string' - __slots__ = () - - @property - def string_prefix(self): - return re.match('\w*(?=[\'"])', self.value).group(0) - - def _get_payload(self): - match = re.search( - r'''('{3}|"{3}|'|")(.*)$''', - self.value, - flags=re.DOTALL - ) - return match.group(2)[:-len(match.group(1))] - - -class FStringString(Leaf): - """ - f-strings contain f-string expressions and normal python strings. These are - the string parts of f-strings. - """ - type = 'fstring_string' - __slots__ = () - - -class FStringStart(Leaf): - """ - f-strings contain f-string expressions and normal python strings. These are - the string parts of f-strings. - """ - type = 'fstring_start' - __slots__ = () - - -class FStringEnd(Leaf): - """ - f-strings contain f-string expressions and normal python strings. These are - the string parts of f-strings. - """ - type = 'fstring_end' - __slots__ = () - - -class _StringComparisonMixin(object): - def __eq__(self, other): - """ - Make comparisons with strings easy. - Improves the readability of the parser. - """ - if isinstance(other, (str, unicode)): - return self.value == other - - return self is other - - def __ne__(self, other): - """Python 2 compatibility.""" - return not self.__eq__(other) - - def __hash__(self): - return hash(self.value) - - -class Operator(_LeafWithoutNewlines, _StringComparisonMixin): - type = 'operator' - __slots__ = () - - -class Keyword(_LeafWithoutNewlines, _StringComparisonMixin): - type = 'keyword' - __slots__ = () - - -class Scope(PythonBaseNode, DocstringMixin): - """ - Super class for the parser tree, which represents the state of a python - text file. - A Scope is either a function, class or lambda. - """ - __slots__ = () - - def __init__(self, children): - super(Scope, self).__init__(children) - - def iter_funcdefs(self): - """ - Returns a generator of `funcdef` nodes. - """ - return self._search_in_scope('funcdef') - - def iter_classdefs(self): - """ - Returns a generator of `classdef` nodes. - """ - return self._search_in_scope('classdef') - - def iter_imports(self): - """ - Returns a generator of `import_name` and `import_from` nodes. - """ - return self._search_in_scope('import_name', 'import_from') - - def _search_in_scope(self, *names): - def scan(children): - for element in children: - if element.type in names: - yield element - if element.type in _FUNC_CONTAINERS: - for e in scan(element.children): - yield e - - return scan(self.children) - - def get_suite(self): - """ - Returns the part that is executed by the function. - """ - return self.children[-1] - - def __repr__(self): - try: - name = self.name.value - except AttributeError: - name = '' - - return "<%s: %s@%s-%s>" % (type(self).__name__, name, - self.start_pos[0], self.end_pos[0]) - - -class Module(Scope): - """ - The top scope, which is always a module. - Depending on the underlying parser this may be a full module or just a part - of a module. - """ - __slots__ = ('_used_names',) - type = 'file_input' - - def __init__(self, children): - super(Module, self).__init__(children) - self._used_names = None - - def _iter_future_import_names(self): - """ - :return: A list of future import names. - :rtype: list of str - """ - # In Python it's not allowed to use future imports after the first - # actual (non-future) statement. However this is not a linter here, - # just return all future imports. If people want to scan for issues - # they should use the API. - for imp in self.iter_imports(): - if imp.type == 'import_from' and imp.level == 0: - for path in imp.get_paths(): - names = [name.value for name in path] - if len(names) == 2 and names[0] == '__future__': - yield names[1] - - def _has_explicit_absolute_import(self): - """ - Checks if imports in this module are explicitly absolute, i.e. there - is a ``__future__`` import. - Currently not public, might be in the future. - :return bool: - """ - for name in self._iter_future_import_names(): - if name == 'absolute_import': - return True - return False - - def get_used_names(self): - """ - Returns all the :class:`Name` leafs that exist in this module. This - includes both definitions and references of names. - """ - if self._used_names is None: - # Don't directly use self._used_names to eliminate a lookup. - dct = {} - - def recurse(node): - try: - children = node.children - except AttributeError: - if node.type == 'name': - arr = dct.setdefault(node.value, []) - arr.append(node) - else: - for child in children: - recurse(child) - - recurse(self) - self._used_names = dct - return self._used_names - - -class Decorator(PythonBaseNode): - type = 'decorator' - __slots__ = () - - -class ClassOrFunc(Scope): - __slots__ = () - - @property - def name(self): - """ - Returns the `Name` leaf that defines the function or class name. - """ - return self.children[1] - - def get_decorators(self): - """ - :rtype: list of :class:`Decorator` - """ - decorated = self.parent - if decorated.type == 'decorated': - if decorated.children[0].type == 'decorators': - return decorated.children[0].children - else: - return decorated.children[:1] - else: - return [] - - -class Class(ClassOrFunc): - """ - Used to store the parsed contents of a python class. - """ - type = 'classdef' - __slots__ = () - - def __init__(self, children): - super(Class, self).__init__(children) - - def get_super_arglist(self): - """ - Returns the `arglist` node that defines the super classes. It returns - None if there are no arguments. - """ - if self.children[2] != '(': # Has no parentheses - return None - else: - if self.children[3] == ')': # Empty parentheses - return None - else: - return self.children[3] - - -def _create_params(parent, argslist_list): - """ - `argslist_list` is a list that can contain an argslist as a first item, but - most not. It's basically the items between the parameter brackets (which is - at most one item). - This function modifies the parser structure. It generates `Param` objects - from the normal ast. Those param objects do not exist in a normal ast, but - make the evaluation of the ast tree so much easier. - You could also say that this function replaces the argslist node with a - list of Param objects. - """ - def check_python2_nested_param(node): - """ - Python 2 allows params to look like ``def x(a, (b, c))``, which is - basically a way of unpacking tuples in params. Python 3 has ditched - this behavior. Jedi currently just ignores those constructs. - """ - return node.type == 'fpdef' and node.children[0] == '(' - - try: - first = argslist_list[0] - except IndexError: - return [] - - if first.type in ('name', 'fpdef'): - if check_python2_nested_param(first): - return [first] - else: - return [Param([first], parent)] - elif first == '*': - return [first] - else: # argslist is a `typedargslist` or a `varargslist`. - if first.type == 'tfpdef': - children = [first] - else: - children = first.children - new_children = [] - start = 0 - # Start with offset 1, because the end is higher. - for end, child in enumerate(children + [None], 1): - if child is None or child == ',': - param_children = children[start:end] - if param_children: # Could as well be comma and then end. - if param_children[0] == '*' and param_children[1] == ',' \ - or check_python2_nested_param(param_children[0]): - for p in param_children: - p.parent = parent - new_children += param_children - else: - new_children.append(Param(param_children, parent)) - start = end - return new_children - - -class Function(ClassOrFunc): - """ - Used to store the parsed contents of a python function. - - Children:: - - 0. - 1. - 2. parameter list (including open-paren and close-paren s) - 3. or 5. - 4. or 6. Node() representing function body - 3. -> (if annotation is also present) - 4. annotation (if present) - """ - type = 'funcdef' - - def __init__(self, children): - super(Function, self).__init__(children) - parameters = self.children[2] # After `def foo` - parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) - - def _get_param_nodes(self): - return self.children[2].children - - def get_params(self): - """ - Returns a list of `Param()`. - """ - return [p for p in self._get_param_nodes() if p.type == 'param'] - - @property - def name(self): - return self.children[1] # First token after `def` - - def iter_yield_exprs(self): - """ - Returns a generator of `yield_expr`. - """ - def scan(children): - for element in children: - if element.type in ('classdef', 'funcdef', 'lambdef'): - continue - - try: - nested_children = element.children - except AttributeError: - if element.value == 'yield': - if element.parent.type == 'yield_expr': - yield element.parent - else: - yield element - else: - for result in scan(nested_children): - yield result - - return scan(self.children) - - def iter_return_stmts(self): - """ - Returns a generator of `return_stmt`. - """ - def scan(children): - for element in children: - if element.type == 'return_stmt' \ - or element.type == 'keyword' and element.value == 'return': - yield element - if element.type in _RETURN_STMT_CONTAINERS: - for e in scan(element.children): - yield e - - return scan(self.children) - - def iter_raise_stmts(self): - """ - Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks - """ - def scan(children): - for element in children: - if element.type == 'raise_stmt' \ - or element.type == 'keyword' and element.value == 'raise': - yield element - if element.type in _RETURN_STMT_CONTAINERS: - for e in scan(element.children): - yield e - - return scan(self.children) - - def is_generator(self): - """ - :return bool: Checks if a function is a generator or not. - """ - return next(self.iter_yield_exprs(), None) is not None - - @property - def annotation(self): - """ - Returns the test node after `->` or `None` if there is no annotation. - """ - try: - if self.children[3] == "->": - return self.children[4] - assert self.children[3] == ":" - return None - except IndexError: - return None - -class Lambda(Function): - """ - Lambdas are basically trimmed functions, so give it the same interface. - - Children:: - - 0. - *. for each argument x - -2. - -1. Node() representing body - """ - type = 'lambdef' - __slots__ = () - - def __init__(self, children): - # We don't want to call the Function constructor, call its parent. - super(Function, self).__init__(children) - # Everything between `lambda` and the `:` operator is a parameter. - self.children[1:-2] = _create_params(self, self.children[1:-2]) - - @property - def name(self): - """ - Raises an AttributeError. Lambdas don't have a defined name. - """ - raise AttributeError("lambda is not named.") - - def _get_param_nodes(self): - return self.children[1:-2] - - @property - def annotation(self): - """ - Returns `None`, lambdas don't have annotations. - """ - return None - - def __repr__(self): - return "<%s@%s>" % (self.__class__.__name__, self.start_pos) - - -class Flow(PythonBaseNode): - __slots__ = () - - -class IfStmt(Flow): - type = 'if_stmt' - __slots__ = () - - def get_test_nodes(self): - """ - E.g. returns all the `test` nodes that are named as x, below: - - if x: - pass - elif x: - pass - """ - for i, c in enumerate(self.children): - if c in ('elif', 'if'): - yield self.children[i + 1] - - def get_corresponding_test_node(self, node): - """ - Searches for the branch in which the node is and returns the - corresponding test node (see function above). However if the node is in - the test node itself and not in the suite return None. - """ - start_pos = node.start_pos - for check_node in reversed(list(self.get_test_nodes())): - if check_node.start_pos < start_pos: - if start_pos < check_node.end_pos: - return None - # In this case the node is within the check_node itself, - # not in the suite - else: - return check_node - - def is_node_after_else(self, node): - """ - Checks if a node is defined after `else`. - """ - for c in self.children: - if c == 'else': - if node.start_pos > c.start_pos: - return True - else: - return False - - -class WhileStmt(Flow): - type = 'while_stmt' - __slots__ = () - - -class ForStmt(Flow): - type = 'for_stmt' - __slots__ = () - - def get_testlist(self): - """ - Returns the input node ``y`` from: ``for x in y:``. - """ - return self.children[3] - - def get_defined_names(self): - return _defined_names(self.children[1]) - - -class TryStmt(Flow): - type = 'try_stmt' - __slots__ = () - - def get_except_clause_tests(self): - """ - Returns the ``test`` nodes found in ``except_clause`` nodes. - Returns ``[None]`` for except clauses without an exception given. - """ - for node in self.children: - if node.type == 'except_clause': - yield node.children[1] - elif node == 'except': - yield None - - -class WithStmt(Flow): - type = 'with_stmt' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the with statement defines. The - defined names are set after `as`. - """ - names = [] - for with_item in self.children[1:-2:2]: - # Check with items for 'as' names. - if with_item.type == 'with_item': - names += _defined_names(with_item.children[2]) - return names - - def get_test_node_from_name(self, name): - node = name.parent - if node.type != 'with_item': - raise ValueError('The name is not actually part of a with statement.') - return node.children[0] - - -class Import(PythonBaseNode): - __slots__ = () - - def get_path_for_name(self, name): - """ - The path is the list of names that leads to the searched name. - - :return list of Name: - """ - try: - # The name may be an alias. If it is, just map it back to the name. - name = self._aliases()[name] - except KeyError: - pass - - for path in self.get_paths(): - if name in path: - return path[:path.index(name) + 1] - raise ValueError('Name should be defined in the import itself') - - def is_nested(self): - return False # By default, sub classes may overwrite this behavior - - def is_star_import(self): - return self.children[-1] == '*' - - -class ImportFrom(Import): - type = 'import_from' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the import defines. The - defined names are set after `import` or in case an alias - `as` - is - present that name is returned. - """ - return [alias or name for name, alias in self._as_name_tuples()] - - def _aliases(self): - """Mapping from alias to its corresponding name.""" - return dict((alias, name) for name, alias in self._as_name_tuples() - if alias is not None) - - def get_from_names(self): - for n in self.children[1:]: - if n not in ('.', '...'): - break - if n.type == 'dotted_name': # from x.y import - return n.children[::2] - elif n == 'import': # from . import - return [] - else: # from x import - return [n] - - @property - def level(self): - """The level parameter of ``__import__``.""" - level = 0 - for n in self.children[1:]: - if n in ('.', '...'): - level += len(n.value) - else: - break - return level - - def _as_name_tuples(self): - last = self.children[-1] - if last == ')': - last = self.children[-2] - elif last == '*': - return # No names defined directly. - - if last.type == 'import_as_names': - as_names = last.children[::2] - else: - as_names = [last] - for as_name in as_names: - if as_name.type == 'name': - yield as_name, None - else: - yield as_name.children[::2] # yields x, y -> ``x as y`` - - def get_paths(self): - """ - The import paths defined in an import statement. Typically an array - like this: ``[, ]``. - - :return list of list of Name: - """ - dotted = self.get_from_names() - - if self.children[-1] == '*': - return [dotted] - return [dotted + [name] for name, alias in self._as_name_tuples()] - - -class ImportName(Import): - """For ``import_name`` nodes. Covers normal imports without ``from``.""" - type = 'import_name' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the import defines. The defined names - is always the first name after `import` or in case an alias - `as` - is - present that name is returned. - """ - return [alias or path[0] for path, alias in self._dotted_as_names()] - - @property - def level(self): - """The level parameter of ``__import__``.""" - return 0 # Obviously 0 for imports without from. - - def get_paths(self): - return [path for path, alias in self._dotted_as_names()] - - def _dotted_as_names(self): - """Generator of (list(path), alias) where alias may be None.""" - dotted_as_names = self.children[1] - if dotted_as_names.type == 'dotted_as_names': - as_names = dotted_as_names.children[::2] - else: - as_names = [dotted_as_names] - - for as_name in as_names: - if as_name.type == 'dotted_as_name': - alias = as_name.children[2] - as_name = as_name.children[0] - else: - alias = None - if as_name.type == 'name': - yield [as_name], alias - else: - # dotted_names - yield as_name.children[::2], alias - - def is_nested(self): - """ - This checks for the special case of nested imports, without aliases and - from statement:: - - import foo.bar - """ - return bool([1 for path, alias in self._dotted_as_names() - if alias is None and len(path) > 1]) - - def _aliases(self): - """ - :return list of Name: Returns all the alias - """ - return dict((alias, path[-1]) for path, alias in self._dotted_as_names() - if alias is not None) - - -class KeywordStatement(PythonBaseNode): - """ - For the following statements: `assert`, `del`, `global`, `nonlocal`, - `raise`, `return`, `yield`, `return`, `yield`. - - `pass`, `continue` and `break` are not in there, because they are just - simple keywords and the parser reduces it to a keyword. - """ - __slots__ = () - - @property - def type(self): - """ - Keyword statements start with the keyword and end with `_stmt`. You can - crosscheck this with the Python grammar. - """ - return '%s_stmt' % self.keyword - - @property - def keyword(self): - return self.children[0].value - - -class AssertStmt(KeywordStatement): - __slots__ = () - - @property - def assertion(self): - return self.children[1] - - -class GlobalStmt(KeywordStatement): - __slots__ = () - - def get_global_names(self): - return self.children[1::2] - - -class ReturnStmt(KeywordStatement): - __slots__ = () - - -class YieldExpr(PythonBaseNode): - type = 'yield_expr' - __slots__ = () - - -def _defined_names(current): - """ - A helper function to find the defined names in statements, for loops and - list comprehensions. - """ - names = [] - if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'): - for child in current.children[::2]: - names += _defined_names(child) - elif current.type in ('atom', 'star_expr'): - names += _defined_names(current.children[1]) - elif current.type in ('power', 'atom_expr'): - if current.children[-2] != '**': # Just if there's no operation - trailer = current.children[-1] - if trailer.children[0] == '.': - names.append(trailer.children[1]) - else: - names.append(current) - return names - - -class ExprStmt(PythonBaseNode, DocstringMixin): - type = 'expr_stmt' - __slots__ = () - - def get_defined_names(self): - """ - Returns a list of `Name` defined before the `=` sign. - """ - names = [] - if self.children[1].type == 'annassign': - names = _defined_names(self.children[0]) - return [ - name - for i in range(0, len(self.children) - 2, 2) - if '=' in self.children[i + 1].value - for name in _defined_names(self.children[i]) - ] + names - - def get_rhs(self): - """Returns the right-hand-side of the equals.""" - return self.children[-1] - - def yield_operators(self): - """ - Returns a generator of `+=`, `=`, etc. or None if there is no operation. - """ - first = self.children[1] - if first.type == 'annassign': - if len(first.children) <= 2: - return # No operator is available, it's just PEP 484. - - first = first.children[2] - yield first - - for operator in self.children[3::2]: - yield operator - - -class Param(PythonBaseNode): - """ - It's a helper class that makes business logic with params much easier. The - Python grammar defines no ``param`` node. It defines it in a different way - that is not really suited to working with parameters. - """ - type = 'param' - - def __init__(self, children, parent): - super(Param, self).__init__(children) - self.parent = parent - for child in children: - child.parent = self - - @property - def star_count(self): - """ - Is `0` in case of `foo`, `1` in case of `*foo` or `2` in case of - `**foo`. - """ - first = self.children[0] - if first in ('*', '**'): - return len(first.value) - return 0 - - @property - def default(self): - """ - The default is the test node that appears after the `=`. Is `None` in - case no default is present. - """ - has_comma = self.children[-1] == ',' - try: - if self.children[-2 - int(has_comma)] == '=': - return self.children[-1 - int(has_comma)] - except IndexError: - return None - - @property - def annotation(self): - """ - The default is the test node that appears after `:`. Is `None` in case - no annotation is present. - """ - tfpdef = self._tfpdef() - if tfpdef.type == 'tfpdef': - assert tfpdef.children[1] == ":" - assert len(tfpdef.children) == 3 - annotation = tfpdef.children[2] - return annotation - else: - return None - - def _tfpdef(self): - """ - tfpdef: see e.g. grammar36.txt. - """ - offset = int(self.children[0] in ('*', '**')) - return self.children[offset] - - @property - def name(self): - """ - The `Name` leaf of the param. - """ - if self._tfpdef().type == 'tfpdef': - return self._tfpdef().children[0] - else: - return self._tfpdef() - - def get_defined_names(self): - return [self.name] - - @property - def position_index(self): - """ - Property for the positional index of a paramter. - """ - index = self.parent.children.index(self) - try: - keyword_only_index = self.parent.children.index('*') - if index > keyword_only_index: - # Skip the ` *, ` - index -= 2 - except ValueError: - pass - return index - 1 - - def get_parent_function(self): - """ - Returns the function/lambda of a parameter. - """ - return search_ancestor(self, 'funcdef', 'lambdef') - - def get_code(self, include_prefix=True, include_comma=True): - """ - Like all the other get_code functions, but includes the param - `include_comma`. - - :param include_comma bool: If enabled includes the comma in the string output. - """ - if include_comma: - return super(Param, self).get_code(include_prefix) - - children = self.children - if children[-1] == ',': - children = children[:-1] - return self._get_code_for_children( - children, - include_prefix=include_prefix - ) - - def __repr__(self): - default = '' if self.default is None else '=%s' % self.default.get_code() - return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) - - -class CompFor(PythonBaseNode): - type = 'comp_for' - __slots__ = () - - def get_defined_names(self): - """ - Returns the a list of `Name` that the comprehension defines. - """ - # allow async for - return _defined_names(self.children[self.children.index('for') + 1]) diff --git a/pythonFiles/parso/tree.py b/pythonFiles/parso/tree.py deleted file mode 100644 index 5316795be57c..000000000000 --- a/pythonFiles/parso/tree.py +++ /dev/null @@ -1,363 +0,0 @@ -from abc import abstractmethod, abstractproperty -from parso._compatibility import utf8_repr, encoding, py_version - - -def search_ancestor(node, *node_types): - """ - Recursively looks at the parents of a node and returns the first found node - that matches node_types. Returns ``None`` if no matching node is found. - - :param node: The ancestors of this node will be checked. - :param node_types: type names that are searched for. - :type node_types: tuple of str - """ - while True: - node = node.parent - if node is None or node.type in node_types: - return node - - -class NodeOrLeaf(object): - """ - The base class for nodes and leaves. - """ - __slots__ = () - type = None - ''' - The type is a string that typically matches the types of the grammar file. - ''' - - def get_root_node(self): - """ - Returns the root node of a parser tree. The returned node doesn't have - a parent node like all the other nodes/leaves. - """ - scope = self - while scope.parent is not None: - scope = scope.parent - return scope - - def get_next_sibling(self): - """ - Returns the node immediately following this node in this parent's - children list. If this node does not have a next sibling, it is None - """ - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - try: - return self.parent.children[i + 1] - except IndexError: - return None - - def get_previous_sibling(self): - """ - Returns the node immediately preceding this node in this parent's - children list. If this node does not have a previous sibling, it is - None. - """ - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - if i == 0: - return None - return self.parent.children[i - 1] - - def get_previous_leaf(self): - """ - Returns the previous leaf in the parser tree. - Returns `None` if this is the first element in the parser tree. - """ - node = self - while True: - c = node.parent.children - i = c.index(node) - if i == 0: - node = node.parent - if node.parent is None: - return None - else: - node = c[i - 1] - break - - while True: - try: - node = node.children[-1] - except AttributeError: # A Leaf doesn't have children. - return node - - def get_next_leaf(self): - """ - Returns the next leaf in the parser tree. - Returns None if this is the last element in the parser tree. - """ - node = self - while True: - c = node.parent.children - i = c.index(node) - if i == len(c) - 1: - node = node.parent - if node.parent is None: - return None - else: - node = c[i + 1] - break - - while True: - try: - node = node.children[0] - except AttributeError: # A Leaf doesn't have children. - return node - - @abstractproperty - def start_pos(self): - """ - Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. - - :return tuple of int: (line, column) - """ - - @abstractproperty - def end_pos(self): - """ - Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. - - :return tuple of int: (line, column) - """ - - @abstractmethod - def get_start_pos_of_prefix(self): - """ - Returns the start_pos of the prefix. This means basically it returns - the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the - prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is - `(1, 2)`. - - :return tuple of int: (line, column) - """ - - @abstractmethod - def get_first_leaf(self): - """ - Returns the first leaf of a node or itself if this is a leaf. - """ - - @abstractmethod - def get_last_leaf(self): - """ - Returns the last leaf of a node or itself if this is a leaf. - """ - - @abstractmethod - def get_code(self, include_prefix=True): - """ - Returns the code that was input the input for the parser for this node. - - :param include_prefix: Removes the prefix (whitespace and comments) of - e.g. a statement. - """ - - -class Leaf(NodeOrLeaf): - ''' - Leafs are basically tokens with a better API. Leafs exactly know where they - were defined and what text preceeds them. - ''' - __slots__ = ('value', 'parent', 'line', 'column', 'prefix') - - def __init__(self, value, start_pos, prefix=''): - self.value = value - ''' - :py:func:`str` The value of the current token. - ''' - self.start_pos = start_pos - self.prefix = prefix - ''' - :py:func:`str` Typically a mixture of whitespace and comments. Stuff - that is syntactically irrelevant for the syntax tree. - ''' - self.parent = None - ''' - The parent :class:`BaseNode` of this leaf. - ''' - - @property - def start_pos(self): - return self.line, self.column - - @start_pos.setter - def start_pos(self, value): - self.line = value[0] - self.column = value[1] - - def get_start_pos_of_prefix(self): - previous_leaf = self.get_previous_leaf() - if previous_leaf is None: - return self.line - self.prefix.count('\n'), 0 # It's the first leaf. - return previous_leaf.end_pos - - def get_first_leaf(self): - return self - - def get_last_leaf(self): - return self - - def get_code(self, include_prefix=True): - if include_prefix: - return self.prefix + self.value - else: - return self.value - - @property - def end_pos(self): - lines = self.value.split('\n') - end_pos_line = self.line + len(lines) - 1 - # Check for multiline token - if self.line == end_pos_line: - end_pos_column = self.column + len(lines[-1]) - else: - end_pos_column = len(lines[-1]) - return end_pos_line, end_pos_column - - @utf8_repr - def __repr__(self): - value = self.value - if not value: - value = self.type - return "<%s: %s>" % (type(self).__name__, value) - - -class TypedLeaf(Leaf): - __slots__ = ('type',) - def __init__(self, type, value, start_pos, prefix=''): - super(TypedLeaf, self).__init__(value, start_pos, prefix) - self.type = type - - -class BaseNode(NodeOrLeaf): - """ - The super class for all nodes. - A node has children, a type and possibly a parent node. - """ - __slots__ = ('children', 'parent') - type = None - - def __init__(self, children): - for c in children: - c.parent = self - self.children = children - """ - A list of :class:`NodeOrLeaf` child nodes. - """ - self.parent = None - ''' - The parent :class:`BaseNode` of this leaf. - None if this is the root node. - ''' - - @property - def start_pos(self): - return self.children[0].start_pos - - def get_start_pos_of_prefix(self): - return self.children[0].get_start_pos_of_prefix() - - @property - def end_pos(self): - return self.children[-1].end_pos - - def _get_code_for_children(self, children, include_prefix): - if include_prefix: - return "".join(c.get_code() for c in children) - else: - first = children[0].get_code(include_prefix=False) - return first + "".join(c.get_code() for c in children[1:]) - - def get_code(self, include_prefix=True): - return self._get_code_for_children(self.children, include_prefix) - - def get_leaf_for_position(self, position, include_prefixes=False): - """ - Get the :py:class:`parso.tree.Leaf` at ``position`` - - :param tuple position: A position tuple, row, column. Rows start from 1 - :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls - on whitespace or comments before a leaf - :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None`` - """ - def binary_search(lower, upper): - if lower == upper: - element = self.children[lower] - if not include_prefixes and position < element.start_pos: - # We're on a prefix. - return None - # In case we have prefixes, a leaf always matches - try: - return element.get_leaf_for_position(position, include_prefixes) - except AttributeError: - return element - - - index = int((lower + upper) / 2) - element = self.children[index] - if position <= element.end_pos: - return binary_search(lower, index) - else: - return binary_search(index + 1, upper) - - if not ((1, 0) <= position <= self.children[-1].end_pos): - raise ValueError('Please provide a position that exists within this node.') - return binary_search(0, len(self.children) - 1) - - def get_first_leaf(self): - return self.children[0].get_first_leaf() - - def get_last_leaf(self): - return self.children[-1].get_last_leaf() - - @utf8_repr - def __repr__(self): - code = self.get_code().replace('\n', ' ').strip() - if not py_version >= 30: - code = code.encode(encoding, 'replace') - return "<%s: %s@%s,%s>" % \ - (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) - - -class Node(BaseNode): - """Concrete implementation for interior nodes.""" - __slots__ = ('type',) - - def __init__(self, type, children): - super(Node, self).__init__(children) - self.type = type - - def __repr__(self): - return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) - - -class ErrorNode(BaseNode): - """ - A node that contains valid nodes/leaves that we're follow by a token that - was invalid. This basically means that the leaf after this node is where - Python would mark a syntax error. - """ - __slots__ = () - type = 'error_node' - - -class ErrorLeaf(Leaf): - """ - A leaf that is either completely invalid in a language (like `$` in Python) - or is invalid at that position. Like the star in `1 +* 1`. - """ - __slots__ = ('original_type',) - type = 'error_leaf' - - def __init__(self, original_type, value, start_pos, prefix=''): - super(ErrorLeaf, self).__init__(value, start_pos, prefix) - self.original_type = original_type - - def __repr__(self): - return "<%s: %s:%s, %s>" % \ - (type(self).__name__, self.original_type, repr(self.value), self.start_pos) diff --git a/pythonFiles/parso/utils.py b/pythonFiles/parso/utils.py deleted file mode 100644 index a4801b996616..000000000000 --- a/pythonFiles/parso/utils.py +++ /dev/null @@ -1,156 +0,0 @@ -from collections import namedtuple -import re -import sys -from ast import literal_eval - -from parso._compatibility import unicode, total_ordering - - -Version = namedtuple('Version', 'major, minor, micro') - - -def split_lines(string, keepends=False): - r""" - Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, - looks at form feeds and other special characters as normal text. Just - splits ``\n`` and ``\r\n``. - Also different: Returns ``[""]`` for an empty string input. - - In Python 2.7 form feeds are used as normal characters when using - str.splitlines. However in Python 3 somewhere there was a decision to split - also on form feeds. - """ - if keepends: - lst = string.splitlines(True) - - # We have to merge lines that were broken by form feed characters. - merge = [] - for i, line in enumerate(lst): - if line.endswith('\f'): - merge.append(i) - - for index in reversed(merge): - try: - lst[index] = lst[index] + lst[index + 1] - del lst[index + 1] - except IndexError: - # index + 1 can be empty and therefore there's no need to - # merge. - pass - - # The stdlib's implementation of the end is inconsistent when calling - # it with/without keepends. One time there's an empty string in the - # end, one time there's none. - if string.endswith('\n') or string == '': - lst.append('') - return lst - else: - return re.split('\n|\r\n', string) - - -def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'): - """ - Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a - unicode object like in :py:meth:`bytes.decode`. - - :param encoding: See :py:meth:`bytes.decode` documentation. - :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be - ``'strict'``, ``'replace'`` or ``'ignore'``. - """ - def detect_encoding(): - """ - For the implementation of encoding definitions in Python, look at: - - http://www.python.org/dev/peps/pep-0263/ - - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations - """ - byte_mark = literal_eval(r"b'\xef\xbb\xbf'") - if source.startswith(byte_mark): - # UTF-8 byte-order mark - return 'utf-8' - - first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) - possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", - first_two_lines) - if possible_encoding: - return possible_encoding.group(1) - else: - # the default if nothing else has been set -> PEP 263 - return encoding - - if isinstance(source, unicode): - # only cast str/bytes - return source - - encoding = detect_encoding() - if not isinstance(encoding, unicode): - encoding = unicode(encoding, 'utf-8', 'replace') - - # Cast to unicode - return unicode(source, encoding, errors) - - -def version_info(): - """ - Returns a namedtuple of parso's version, similar to Python's - ``sys.version_info``. - """ - from parso import __version__ - tupl = re.findall(r'[a-z]+|\d+', __version__) - return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) - - -def _parse_version(version): - match = re.match(r'(\d+)(?:\.(\d)(?:\.\d+)?)?$', version) - if match is None: - raise ValueError('The given version is not in the right format. ' - 'Use something like "3.2" or "3".') - - major = int(match.group(1)) - minor = match.group(2) - if minor is None: - # Use the latest Python in case it's not exactly defined, because the - # grammars are typically backwards compatible? - if major == 2: - minor = "7" - elif major == 3: - minor = "6" - else: - raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") - minor = int(minor) - return PythonVersionInfo(major, minor) - - -@total_ordering -class PythonVersionInfo(namedtuple('Version', 'major, minor')): - def __gt__(self, other): - if isinstance(other, tuple): - if len(other) != 2: - raise ValueError("Can only compare to tuples of length 2.") - return (self.major, self.minor) > other - super(PythonVersionInfo, self).__gt__(other) - - return (self.major, self.minor) - - def __eq__(self, other): - if isinstance(other, tuple): - if len(other) != 2: - raise ValueError("Can only compare to tuples of length 2.") - return (self.major, self.minor) == other - super(PythonVersionInfo, self).__eq__(other) - - def __ne__(self, other): - return not self.__eq__(other) - - -def parse_version_string(version=None): - """ - Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and - returns a corresponding version info that is always two characters long in - decimal. - """ - if version is None: - version = '%s.%s' % sys.version_info[:2] - if not isinstance(version, (unicode, str)): - raise TypeError("version must be a string like 3.2.") - - return _parse_version(version) From b9eb8a6ec1cd3a90d07af6ffadfa00d3d1830494 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Fri, 1 Jun 2018 21:37:52 -0700 Subject: [PATCH 2/4] Update parso to 0.2.1 --- pythonFiles/parso/__init__.py | 58 ++ pythonFiles/parso/_compatibility.py | 103 ++ pythonFiles/parso/cache.py | 162 ++++ pythonFiles/parso/grammar.py | 250 +++++ pythonFiles/parso/normalizer.py | 184 ++++ pythonFiles/parso/parser.py | 78 ++ pythonFiles/parso/pgen2/__init__.py | 8 + pythonFiles/parso/pgen2/grammar.py | 128 +++ pythonFiles/parso/pgen2/parse.py | 223 +++++ pythonFiles/parso/pgen2/pgen.py | 400 ++++++++ pythonFiles/parso/python/__init__.py | 0 pythonFiles/parso/python/diff.py | 594 ++++++++++++ pythonFiles/parso/python/errors.py | 994 ++++++++++++++++++++ pythonFiles/parso/python/grammar26.txt | 159 ++++ pythonFiles/parso/python/grammar27.txt | 143 +++ pythonFiles/parso/python/grammar33.txt | 134 +++ pythonFiles/parso/python/grammar34.txt | 134 +++ pythonFiles/parso/python/grammar35.txt | 153 +++ pythonFiles/parso/python/grammar36.txt | 157 ++++ pythonFiles/parso/python/grammar37.txt | 157 ++++ pythonFiles/parso/python/parser.py | 265 ++++++ pythonFiles/parso/python/pep8.py | 727 +++++++++++++++ pythonFiles/parso/python/prefix.py | 97 ++ pythonFiles/parso/python/token.py | 113 +++ pythonFiles/parso/python/tokenize.py | 609 ++++++++++++ pythonFiles/parso/python/tree.py | 1194 ++++++++++++++++++++++++ pythonFiles/parso/tree.py | 363 +++++++ pythonFiles/parso/utils.py | 156 ++++ 28 files changed, 7743 insertions(+) create mode 100644 pythonFiles/parso/__init__.py create mode 100644 pythonFiles/parso/_compatibility.py create mode 100644 pythonFiles/parso/cache.py create mode 100644 pythonFiles/parso/grammar.py create mode 100644 pythonFiles/parso/normalizer.py create mode 100644 pythonFiles/parso/parser.py create mode 100644 pythonFiles/parso/pgen2/__init__.py create mode 100644 pythonFiles/parso/pgen2/grammar.py create mode 100644 pythonFiles/parso/pgen2/parse.py create mode 100644 pythonFiles/parso/pgen2/pgen.py create mode 100644 pythonFiles/parso/python/__init__.py create mode 100644 pythonFiles/parso/python/diff.py create mode 100644 pythonFiles/parso/python/errors.py create mode 100644 pythonFiles/parso/python/grammar26.txt create mode 100644 pythonFiles/parso/python/grammar27.txt create mode 100644 pythonFiles/parso/python/grammar33.txt create mode 100644 pythonFiles/parso/python/grammar34.txt create mode 100644 pythonFiles/parso/python/grammar35.txt create mode 100644 pythonFiles/parso/python/grammar36.txt create mode 100644 pythonFiles/parso/python/grammar37.txt create mode 100644 pythonFiles/parso/python/parser.py create mode 100644 pythonFiles/parso/python/pep8.py create mode 100644 pythonFiles/parso/python/prefix.py create mode 100644 pythonFiles/parso/python/token.py create mode 100644 pythonFiles/parso/python/tokenize.py create mode 100644 pythonFiles/parso/python/tree.py create mode 100644 pythonFiles/parso/tree.py create mode 100644 pythonFiles/parso/utils.py diff --git a/pythonFiles/parso/__init__.py b/pythonFiles/parso/__init__.py new file mode 100644 index 000000000000..9654389dea9f --- /dev/null +++ b/pythonFiles/parso/__init__.py @@ -0,0 +1,58 @@ +r""" +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +.. _jedi: https://github.com/davidhalter/jedi + +A simple example: + +>>> import parso +>>> module = parso.parse('hello + 1', version="3.6") +>>> expr = module.children[0] +>>> expr +PythonNode(arith_expr, [, , ]) +>>> print(expr.get_code()) +hello + 1 +>>> name = expr.children[0] +>>> name + +>>> name.end_pos +(1, 5) +>>> expr.end_pos +(1, 9) + +To list multiple issues: + +>>> grammar = parso.load_grammar() +>>> module = grammar.parse('foo +\nbar\ncontinue') +>>> error1, error2 = grammar.iter_errors(module) +>>> error1.message +'SyntaxError: invalid syntax' +>>> error2.message +"SyntaxError: 'continue' not properly in loop" +""" + +from parso.parser import ParserSyntaxError +from parso.grammar import Grammar, load_grammar +from parso.utils import split_lines, python_bytes_to_unicode + + +__version__ = '0.2.1' + + +def parse(code=None, **kwargs): + """ + A utility function to avoid loading grammars. + Params are documented in :py:meth:`parso.Grammar.parse`. + + :param str version: The version used by :py:func:`parso.load_grammar`. + """ + version = kwargs.pop('version', None) + grammar = load_grammar(version=version) + return grammar.parse(code, **kwargs) diff --git a/pythonFiles/parso/_compatibility.py b/pythonFiles/parso/_compatibility.py new file mode 100644 index 000000000000..db411eebf981 --- /dev/null +++ b/pythonFiles/parso/_compatibility.py @@ -0,0 +1,103 @@ +""" +To ensure compatibility from Python ``2.6`` - ``3.3``, a module has been +created. Clearly there is huge need to use conforming syntax. +""" +import sys +import platform + +# Cannot use sys.version.major and minor names, because in Python 2.6 it's not +# a namedtuple. +py_version = int(str(sys.version_info[0]) + str(sys.version_info[1])) + +# unicode function +try: + unicode = unicode +except NameError: + unicode = str + +is_pypy = platform.python_implementation() == 'PyPy' + + +def use_metaclass(meta, *bases): + """ Create a class with a metaclass. """ + if not bases: + bases = (object,) + return meta("HackClass", bases, {}) + + +try: + encoding = sys.stdout.encoding + if encoding is None: + encoding = 'utf-8' +except AttributeError: + encoding = 'ascii' + + +def u(string): + """Cast to unicode DAMMIT! + Written because Python2 repr always implicitly casts to a string, so we + have to cast back to a unicode (and we know that we always deal with valid + unicode, because we check that in the beginning). + """ + if py_version >= 30: + return str(string) + + if not isinstance(string, unicode): + return unicode(str(string), 'UTF-8') + return string + + +try: + FileNotFoundError = FileNotFoundError +except NameError: + FileNotFoundError = IOError + + +def utf8_repr(func): + """ + ``__repr__`` methods in Python 2 don't allow unicode objects to be + returned. Therefore cast them to utf-8 bytes in this decorator. + """ + def wrapper(self): + result = func(self) + if isinstance(result, unicode): + return result.encode('utf-8') + else: + return result + + if py_version >= 30: + return func + else: + return wrapper + + +try: + from functools import total_ordering +except ImportError: + # Python 2.6 + def total_ordering(cls): + """Class decorator that fills in missing ordering methods""" + convert = { + '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), + ('__le__', lambda self, other: self < other or self == other), + ('__ge__', lambda self, other: not self < other)], + '__le__': [('__ge__', lambda self, other: not self <= other or self == other), + ('__lt__', lambda self, other: self <= other and not self == other), + ('__gt__', lambda self, other: not self <= other)], + '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), + ('__ge__', lambda self, other: self > other or self == other), + ('__le__', lambda self, other: not self > other)], + '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), + ('__gt__', lambda self, other: self >= other and not self == other), + ('__lt__', lambda self, other: not self >= other)] + } + roots = set(dir(cls)) & set(convert) + if not roots: + raise ValueError('must define at least one ordering operation: < > <= >=') + root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ + for opname, opfunc in convert[root]: + if opname not in roots: + opfunc.__name__ = opname + opfunc.__doc__ = getattr(int, opname).__doc__ + setattr(cls, opname, opfunc) + return cls diff --git a/pythonFiles/parso/cache.py b/pythonFiles/parso/cache.py new file mode 100644 index 000000000000..d0465d023086 --- /dev/null +++ b/pythonFiles/parso/cache.py @@ -0,0 +1,162 @@ +import time +import os +import sys +import hashlib +import gc +import shutil +import platform +import errno +import logging + +try: + import cPickle as pickle +except: + import pickle + +from parso._compatibility import FileNotFoundError + +LOG = logging.getLogger(__name__) + + +_PICKLE_VERSION = 30 +""" +Version number (integer) for file system cache. + +Increment this number when there are any incompatible changes in +the parser tree classes. For example, the following changes +are regarded as incompatible. + +- A class name is changed. +- A class is moved to another module. +- A __slot__ of a class is changed. +""" + +_VERSION_TAG = '%s-%s%s-%s' % ( + platform.python_implementation(), + sys.version_info[0], + sys.version_info[1], + _PICKLE_VERSION +) +""" +Short name for distinguish Python implementations and versions. + +It's like `sys.implementation.cache_tag` but for Python < 3.3 +we generate something similar. See: +http://docs.python.org/3/library/sys.html#sys.implementation +""" + +def _get_default_cache_path(): + if platform.system().lower() == 'windows': + dir_ = os.path.join(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') + elif platform.system().lower() == 'darwin': + dir_ = os.path.join('~', 'Library', 'Caches', 'Parso') + else: + dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') + return os.path.expanduser(dir_) + +_default_cache_path = _get_default_cache_path() +""" +The path where the cache is stored. + +On Linux, this defaults to ``~/.cache/parso/``, on OS X to +``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. +On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, +``$XDG_CACHE_HOME/parso`` is used instead of the default one. +""" + +parser_cache = {} + + +class _NodeCacheItem(object): + def __init__(self, node, lines, change_time=None): + self.node = node + self.lines = lines + if change_time is None: + change_time = time.time() + self.change_time = change_time + + +def load_module(hashed_grammar, path, cache_path=None): + """ + Returns a module or None, if it fails. + """ + try: + p_time = os.path.getmtime(path) + except FileNotFoundError: + return None + + try: + module_cache_item = parser_cache[hashed_grammar][path] + if p_time <= module_cache_item.change_time: + return module_cache_item.node + except KeyError: + return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path) + + +def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): + cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) + try: + try: + if p_time > os.path.getmtime(cache_path): + # Cache is outdated + return None + except OSError as e: + if e.errno == errno.ENOENT: + # In Python 2 instead of an IOError here we get an OSError. + raise FileNotFoundError + else: + raise + + with open(cache_path, 'rb') as f: + gc.disable() + try: + module_cache_item = pickle.load(f) + finally: + gc.enable() + except FileNotFoundError: + return None + else: + parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item + LOG.debug('pickle loaded: %s', path) + return module_cache_item.node + + +def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None): + try: + p_time = None if path is None else os.path.getmtime(path) + except OSError: + p_time = None + pickling = False + + item = _NodeCacheItem(module, lines, p_time) + parser_cache.setdefault(hashed_grammar, {})[path] = item + if pickling and path is not None: + _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) + + +def _save_to_file_system(hashed_grammar, path, item, cache_path=None): + with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: + pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) + + +def clear_cache(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + shutil.rmtree(cache_path) + parser_cache.clear() + + +def _get_hashed_path(hashed_grammar, path, cache_path=None): + directory = _get_cache_directory_path(cache_path=cache_path) + + file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() + return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) + + +def _get_cache_directory_path(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + directory = os.path.join(cache_path, _VERSION_TAG) + if not os.path.exists(directory): + os.makedirs(directory) + return directory diff --git a/pythonFiles/parso/grammar.py b/pythonFiles/parso/grammar.py new file mode 100644 index 000000000000..6c13f002f90d --- /dev/null +++ b/pythonFiles/parso/grammar.py @@ -0,0 +1,250 @@ +import hashlib +import os + +from parso._compatibility import FileNotFoundError, is_pypy +from parso.pgen2.pgen import generate_grammar +from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string +from parso.python.diff import DiffParser +from parso.python.tokenize import tokenize_lines, tokenize +from parso.python import token +from parso.cache import parser_cache, load_module, save_module +from parso.parser import BaseParser +from parso.python.parser import Parser as PythonParser +from parso.python.errors import ErrorFinderConfig +from parso.python import pep8 + +_loaded_grammars = {} + + +class Grammar(object): + """ + :py:func:`parso.load_grammar` returns instances of this class. + + Creating custom none-python grammars by calling this is not supported, yet. + """ + #:param text: A BNF representation of your grammar. + _error_normalizer_config = None + _token_namespace = None + _default_normalizer_config = pep8.PEP8NormalizerConfig() + + def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None): + self._pgen_grammar = generate_grammar( + text, + token_namespace=self._get_token_namespace() + ) + self._parser = parser + self._tokenizer = tokenizer + self._diff_parser = diff_parser + self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() + + def parse(self, code=None, **kwargs): + """ + If you want to parse a Python file you want to start here, most likely. + + If you need finer grained control over the parsed instance, there will be + other ways to access it. + + :param str code: A unicode or bytes string. When it's not possible to + decode bytes to a string, returns a + :py:class:`UnicodeDecodeError`. + :param bool error_recovery: If enabled, any code will be returned. If + it is invalid, it will be returned as an error node. If disabled, + you will get a ParseError when encountering syntax errors in your + code. + :param str start_symbol: The grammar symbol that you want to parse. Only + allowed to be used when error_recovery is False. + :param str path: The path to the file you want to open. Only needed for caching. + :param bool cache: Keeps a copy of the parser tree in RAM and on disk + if a path is given. Returns the cached trees if the corresponding + files on disk have not changed. + :param bool diff_cache: Diffs the cached python module against the new + code and tries to parse only the parts that have changed. Returns + the same (changed) module that is found in cache. Using this option + requires you to not do anything anymore with the cached modules + under that path, because the contents of it might change. This + option is still somewhat experimental. If you want stability, + please don't use it. + :param bool cache_path: If given saves the parso cache in this + directory. If not given, defaults to the default cache places on + each platform. + + :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a + :py:class:`parso.python.tree.Module`. + """ + if 'start_pos' in kwargs: + raise TypeError("parse() got an unexpected keyword argument.") + return self._parse(code=code, **kwargs) + + def _parse(self, code=None, error_recovery=True, path=None, + start_symbol=None, cache=False, diff_cache=False, + cache_path=None, start_pos=(1, 0)): + """ + Wanted python3.5 * operator and keyword only arguments. Therefore just + wrap it all. + start_pos here is just a parameter internally used. Might be public + sometime in the future. + """ + if code is None and path is None: + raise TypeError("Please provide either code or a path.") + + if start_symbol is None: + start_symbol = self._start_symbol + + if error_recovery and start_symbol != 'file_input': + raise NotImplementedError("This is currently not implemented.") + + if cache and path is not None: + module_node = load_module(self._hashed, path, cache_path=cache_path) + if module_node is not None: + return module_node + + if code is None: + with open(path, 'rb') as f: + code = f.read() + + code = python_bytes_to_unicode(code) + + lines = split_lines(code, keepends=True) + if diff_cache: + if self._diff_parser is None: + raise TypeError("You have to define a diff parser to be able " + "to use this option.") + try: + module_cache_item = parser_cache[self._hashed][path] + except KeyError: + pass + else: + module_node = module_cache_item.node + old_lines = module_cache_item.lines + if old_lines == lines: + return module_node + + new_node = self._diff_parser( + self._pgen_grammar, self._tokenizer, module_node + ).update( + old_lines=old_lines, + new_lines=lines + ) + save_module(self._hashed, path, new_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return new_node + + tokens = self._tokenizer(lines, start_pos) + + p = self._parser( + self._pgen_grammar, + error_recovery=error_recovery, + start_symbol=start_symbol + ) + root_node = p.parse(tokens=tokens) + + if cache or diff_cache: + save_module(self._hashed, path, root_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return root_node + + def _get_token_namespace(self): + ns = self._token_namespace + if ns is None: + raise ValueError("The token namespace should be set.") + return ns + + def iter_errors(self, node): + """ + Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of + :py:class:`parso.normalizer.Issue` objects. For Python this is + a list of syntax/indentation errors. + """ + if self._error_normalizer_config is None: + raise ValueError("No error normalizer specified for this grammar.") + + return self._get_normalizer_issues(node, self._error_normalizer_config) + + def _get_normalizer(self, normalizer_config): + if normalizer_config is None: + normalizer_config = self._default_normalizer_config + if normalizer_config is None: + raise ValueError("You need to specify a normalizer, because " + "there's no default normalizer for this tree.") + return normalizer_config.create_normalizer(self) + + def _normalize(self, node, normalizer_config=None): + """ + TODO this is not public, yet. + The returned code will be normalized, e.g. PEP8 for Python. + """ + normalizer = self._get_normalizer(normalizer_config) + return normalizer.walk(node) + + def _get_normalizer_issues(self, node, normalizer_config=None): + normalizer = self._get_normalizer(normalizer_config) + normalizer.walk(node) + return normalizer.issues + + def __repr__(self): + labels = self._pgen_grammar.number2symbol.values() + txt = ' '.join(list(labels)[:3]) + ' ...' + return '<%s:%s>' % (self.__class__.__name__, txt) + + +class PythonGrammar(Grammar): + _error_normalizer_config = ErrorFinderConfig() + _token_namespace = token + _start_symbol = 'file_input' + + def __init__(self, version_info, bnf_text): + super(PythonGrammar, self).__init__( + bnf_text, + tokenizer=self._tokenize_lines, + parser=PythonParser, + diff_parser=DiffParser + ) + self.version_info = version_info + + def _tokenize_lines(self, lines, start_pos): + return tokenize_lines(lines, self.version_info, start_pos=start_pos) + + def _tokenize(self, code): + # Used by Jedi. + return tokenize(code, self.version_info) + + +def load_grammar(**kwargs): + """ + Loads a :py:class:`parso.Grammar`. The default version is the current Python + version. + + :param str version: A python version string, e.g. ``version='3.3'``. + :param str path: A path to a grammar file + """ + def load_grammar(language='python', version=None, path=None): + if language == 'python': + version_info = parse_version_string(version) + + file = path or os.path.join( + 'python', + 'grammar%s%s.txt' % (version_info.major, version_info.minor) + ) + + global _loaded_grammars + path = os.path.join(os.path.dirname(__file__), file) + try: + return _loaded_grammars[path] + except KeyError: + try: + with open(path) as f: + bnf_text = f.read() + + grammar = PythonGrammar(version_info, bnf_text) + return _loaded_grammars.setdefault(path, grammar) + except FileNotFoundError: + message = "Python version %s is currently not supported." % version + raise NotImplementedError(message) + else: + raise NotImplementedError("No support for language %s." % language) + + return load_grammar(**kwargs) diff --git a/pythonFiles/parso/normalizer.py b/pythonFiles/parso/normalizer.py new file mode 100644 index 000000000000..9a3e82e24c87 --- /dev/null +++ b/pythonFiles/parso/normalizer.py @@ -0,0 +1,184 @@ +from contextlib import contextmanager + +from parso._compatibility import use_metaclass + + +class _NormalizerMeta(type): + def __new__(cls, name, bases, dct): + new_cls = type.__new__(cls, name, bases, dct) + new_cls.rule_value_classes = {} + new_cls.rule_type_classes = {} + return new_cls + + +class Normalizer(use_metaclass(_NormalizerMeta)): + def __init__(self, grammar, config): + self.grammar = grammar + self._config = config + self.issues = [] + + self._rule_type_instances = self._instantiate_rules('rule_type_classes') + self._rule_value_instances = self._instantiate_rules('rule_value_classes') + + def _instantiate_rules(self, attr): + dct = {} + for base in type(self).mro(): + rules_map = getattr(base, attr, {}) + for type_, rule_classes in rules_map.items(): + new = [rule_cls(self) for rule_cls in rule_classes] + dct.setdefault(type_, []).extend(new) + return dct + + def walk(self, node): + self.initialize(node) + value = self.visit(node) + self.finalize() + return value + + def visit(self, node): + try: + children = node.children + except AttributeError: + return self.visit_leaf(node) + else: + with self.visit_node(node): + return ''.join(self.visit(child) for child in children) + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + yield + + def _check_type_rules(self, node): + for rule in self._rule_type_instances.get(node.type, []): + rule.feed_node(node) + + def visit_leaf(self, leaf): + self._check_type_rules(leaf) + + for rule in self._rule_value_instances.get(leaf.value, []): + rule.feed_node(leaf) + + return leaf.prefix + leaf.value + + def initialize(self, node): + pass + + def finalize(self): + pass + + def add_issue(self, node, code, message): + issue = Issue(node, code, message) + if issue not in self.issues: + self.issues.append(issue) + return True + + @classmethod + def register_rule(cls, **kwargs): + """ + Use it as a class decorator:: + + normalizer = Normalizer('grammar', 'config') + @normalizer.register_rule(value='foo') + class MyRule(Rule): + error_code = 42 + """ + return cls._register_rule(**kwargs) + + @classmethod + def _register_rule(cls, value=None, values=(), type=None, types=()): + values = list(values) + types = list(types) + if value is not None: + values.append(value) + if type is not None: + types.append(type) + + if not values and not types: + raise ValueError("You must register at least something.") + + def decorator(rule_cls): + for v in values: + cls.rule_value_classes.setdefault(v, []).append(rule_cls) + for t in types: + cls.rule_type_classes.setdefault(t, []).append(rule_cls) + return rule_cls + + return decorator + + +class NormalizerConfig(object): + normalizer_class = Normalizer + + def create_normalizer(self, grammar): + if self.normalizer_class is None: + return None + + return self.normalizer_class(grammar, self) + + +class Issue(object): + def __init__(self, node, code, message): + self._node = node + self.code = code + """ + An integer code that stands for the type of error. + """ + self.message = message + """ + A message (string) for the issue. + """ + self.start_pos = node.start_pos + """ + The start position position of the error as a tuple (line, column). As + always in |parso| the first line is 1 and the first column 0. + """ + + def __eq__(self, other): + return self.start_pos == other.start_pos and self.code == other.code + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.code, self.start_pos)) + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.code) + + + +class Rule(object): + code = None + message = None + + def __init__(self, normalizer): + self._normalizer = normalizer + + def is_issue(self, node): + raise NotImplementedError() + + def get_node(self, node): + return node + + def _get_message(self, message): + if message is None: + message = self.message + if message is None: + raise ValueError("The message on the class is not set.") + return message + + def add_issue(self, node, code=None, message=None): + if code is None: + code = self.code + if code is None: + raise ValueError("The error code on the class is not set.") + + message = self._get_message(message) + + self._normalizer.add_issue(node, code, message) + + def feed_node(self, node): + if self.is_issue(node): + issue_node = self.get_node(node) + self.add_issue(issue_node) diff --git a/pythonFiles/parso/parser.py b/pythonFiles/parso/parser.py new file mode 100644 index 000000000000..555ebc712f73 --- /dev/null +++ b/pythonFiles/parso/parser.py @@ -0,0 +1,78 @@ +""" +The ``Parser`` tries to convert the available Python code in an easy to read +format, something like an abstract syntax tree. The classes who represent this +tree, are sitting in the :mod:`parso.tree` module. + +The Python module ``tokenize`` is a very important part in the ``Parser``, +because it splits the code into different words (tokens). Sometimes it looks a +bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` +module for this? Well, ``ast`` does a very good job understanding proper Python +code, but fails to work as soon as there's a single line of broken code. + +There's one important optimization that needs to be known: Statements are not +being parsed completely. ``Statement`` is just a representation of the tokens +within the statement. This lowers memory usage and cpu time and reduces the +complexity of the ``Parser`` (there's another parser sitting inside +``Statement``, which produces ``Array`` and ``Call``). +""" +from parso import tree +from parso.pgen2.parse import PgenParser + + +class ParserSyntaxError(Exception): + """ + Contains error information about the parser tree. + + May be raised as an exception. + """ + def __init__(self, message, error_leaf): + self.message = message + self.error_leaf = error_leaf + + +class BaseParser(object): + node_map = {} + default_node = tree.Node + + leaf_map = { + } + default_leaf = tree.Leaf + + def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False): + self._pgen_grammar = pgen_grammar + self._start_symbol = start_symbol + self._error_recovery = error_recovery + + def parse(self, tokens): + start_number = self._pgen_grammar.symbol2number[self._start_symbol] + self.pgen_parser = PgenParser( + self._pgen_grammar, self.convert_node, self.convert_leaf, + self.error_recovery, start_number + ) + + node = self.pgen_parser.parse(tokens) + # The stack is empty now, we don't need it anymore. + del self.pgen_parser + return node + + def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + add_token_callback): + if self._error_recovery: + raise NotImplementedError("Error Recovery is not implemented") + else: + error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) + raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) + + def convert_node(self, pgen_grammar, type_, children): + # TODO REMOVE symbol, we don't want type here. + symbol = pgen_grammar.number2symbol[type_] + try: + return self.node_map[symbol](children) + except KeyError: + return self.default_node(symbol, children) + + def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos): + try: + return self.leaf_map[type_](value, start_pos, prefix) + except KeyError: + return self.default_leaf(value, start_pos, prefix) diff --git a/pythonFiles/parso/pgen2/__init__.py b/pythonFiles/parso/pgen2/__init__.py new file mode 100644 index 000000000000..1ddae5fea9f7 --- /dev/null +++ b/pythonFiles/parso/pgen2/__init__.py @@ -0,0 +1,8 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +# Copyright 2014 David Halter. Integration into Jedi. +# Modifications are dual-licensed: MIT and PSF. diff --git a/pythonFiles/parso/pgen2/grammar.py b/pythonFiles/parso/pgen2/grammar.py new file mode 100644 index 000000000000..e5f211426fad --- /dev/null +++ b/pythonFiles/parso/pgen2/grammar.py @@ -0,0 +1,128 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2014 David Halter. Integration into Jedi. +# Modifications are dual-licensed: MIT and PSF. + +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +try: + import cPickle as pickle +except: + import pickle + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self, bnf_text): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, "EMPTY")] + self.keywords = {} + self.tokens = {} + self.symbol2label = {} + self.label2symbol = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file.""" + with open(filename, "wb") as f: + pickle.dump(self.__dict__, f, 2) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, "rb") as f: + d = pickle.load(f) + self.__dict__.update(d) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", + "tokens", "symbol2label"): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print("s2n") + pprint(self.symbol2number) + print("n2s") + pprint(self.number2symbol) + print("states") + pprint(self.states) + print("dfas") + pprint(self.dfas) + print("labels") + pprint(self.labels) + print("start", self.start) diff --git a/pythonFiles/parso/pgen2/parse.py b/pythonFiles/parso/pgen2/parse.py new file mode 100644 index 000000000000..aaacfcebe44e --- /dev/null +++ b/pythonFiles/parso/pgen2/parse.py @@ -0,0 +1,223 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2014 David Halter. Integration into Jedi. +# Modifications are dual-licensed: MIT and PSF. + +""" +Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. +""" + +from parso.python import tokenize + + +class InternalParseError(Exception): + """ + Exception to signal the parser is stuck and error recovery didn't help. + Basically this shouldn't happen. It's a sign that something is really + wrong. + """ + + def __init__(self, msg, type, value, start_pos): + Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % + (msg, tokenize.tok_name[type], value, start_pos)) + self.msg = msg + self.type = type + self.value = value + self.start_pos = start_pos + + +class Stack(list): + def get_tos_nodes(self): + tos = self[-1] + return tos[2][1] + + +def token_to_ilabel(grammar, type_, value): + # Map from token to label + if type_ == tokenize.NAME: + # Check for reserved words (keywords) + try: + return grammar.keywords[value] + except KeyError: + pass + + try: + return grammar.tokens[type_] + except KeyError: + return None + + +class PgenParser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + : + if p.add_token(...): # parse a token + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when add_token() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, error_recovery() + is called. There is no error recovery; the parser cannot be used + after a syntax error was reported (but it can be reinitialized by + calling setup()). + + """ + + def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, nodes) tuple, where + type is the node type (a token or symbol number) and nodes + is a list of children for symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert_node = convert_node + self.convert_leaf = convert_leaf + + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, children), + # where children is a list of nodes or None + newnode = (start, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = Stack([stackentry]) + self.rootnode = None + self.error_recovery = error_recovery + + def parse(self, tokens): + for type_, value, start_pos, prefix in tokens: + if self.add_token(type_, value, start_pos, prefix): + break + else: + # We never broke out -- EOF is too soon -- Unfinished statement. + # However, the error recovery might have added the token again, if + # the stack is empty, we're fine. + if self.stack: + raise InternalParseError("incomplete input", type_, value, start_pos) + return self.rootnode + + def add_token(self, type_, value, start_pos, prefix): + """Add a token; return True if this is the end of the program.""" + ilabel = token_to_ilabel(self.grammar, type_, value) + + # Loop until the token is shifted; may raise exceptions + _gram = self.grammar + _labels = _gram.labels + _push = self._push + _pop = self._pop + _shift = self._shift + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = _labels[i] + if ilabel == i: + # Look it up in the list of labels + assert t < 256 + # Shift a token; we're done with it + _shift(type_, value, newstate, prefix, start_pos) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + _pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = _gram.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + _push(t, itsdfa, newstate) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + _pop() + if not self.stack: + # Done parsing, but another token is input + raise InternalParseError("too much input", type_, value, start_pos) + else: + self.error_recovery(self.grammar, self.stack, arcs, type_, + value, start_pos, prefix, self.add_token) + break + + def _shift(self, type_, value, newstate, prefix, start_pos): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def _push(self, type_, newdfa, newstate): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type_, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def _pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, (type_, children) = self.stack.pop() + # If there's exactly one child, return that child instead of creating a + # new node. We still create expr_stmt and file_input though, because a + # lot of Jedi depends on its logic. + if len(children) == 1: + newnode = children[0] + else: + newnode = self.convert_node(self.grammar, type_, children) + + try: + # Equal to: + # dfa, state, node = self.stack[-1] + # symbol, children = node + self.stack[-1][2][1].append(newnode) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = newnode diff --git a/pythonFiles/parso/pgen2/pgen.py b/pythonFiles/parso/pgen2/pgen.py new file mode 100644 index 000000000000..a3e39fa5fe74 --- /dev/null +++ b/pythonFiles/parso/pgen2/pgen.py @@ -0,0 +1,400 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2014 David Halter. Integration into Jedi. +# Modifications are dual-licensed: MIT and PSF. + +from parso.pgen2 import grammar +from parso.python import token +from parso.python import tokenize +from parso.utils import parse_version_string + + +class ParserGenerator(object): + def __init__(self, bnf_text, token_namespace): + self._bnf_text = bnf_text + self.generator = tokenize.tokenize( + bnf_text, + version_info=parse_version_string('3.6') + ) + self._gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self._parse() + self.first = {} # map from symbol name to set of tokens + self._addfirstsets() + self._token_namespace = token_namespace + + def make_grammar(self): + c = grammar.Grammar(self._bnf_text) + names = list(self.dfas.keys()) + names.sort() + # TODO do we still need this? + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in state.arcs.items(): + arcs.append((self._make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def _make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in rawfirst: + ilabel = self._make_label(c, label) + ##assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def _make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + c.label2symbol[ilabel] = label + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(self._token_namespace, label, None) + assert isinstance(itoken, int), label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in c.keywords: + return c.keywords[value] + else: + # TODO this might be an issue?! Using token.NAME here? + c.labels.append((token.NAME, value)) + c.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = self._token_namespace.generate_token_id(value) + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def _addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self._calcfirst(name) + #print name, self.first[name].keys() + + def _calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError("recursion for rule %r" % name) + else: + self._calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def _parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self._gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self._expect(token.NAME) + self._expect(token.COLON) + a, z = self._parse_rhs() + self._expect(token.NEWLINE) + #self._dump_nfa(name, a, z) + dfa = self._make_dfa(a, z) + #self._dump_dfa(name, dfa) + # oldlen = len(dfa) + self._simplify_dfa(dfa) + # newlen = len(dfa) + dfas[name] = dfa + #print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def _make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + + def closure(state): + base = {} + addclosure(state, base) + return base + + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in arcs.items(): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def _dump_nfa(self, name, start, finish): + print("Dump of NFA for", name) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + def _dump_dfa(self, name, dfa): + print("Dump of DFA for", name) + for i, state in enumerate(dfa): + print(" State", i, state.isfinal and "(final)" or "") + for label, next in state.arcs.items(): + print(" %s -> %d" % (label, dfa.index(next))) + + def _simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i + 1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + #print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def _parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self._parse_alt() + if self.value != "|": + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == "|": + self._gettoken() + a, z = self._parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def _parse_alt(self): + # ALT: ITEM+ + a, b = self._parse_item() + while (self.value in ("(", "[") or + self.type in (token.NAME, token.STRING)): + c, d = self._parse_item() + b.addarc(c) + b = d + return a, b + + def _parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == "[": + self._gettoken() + a, z = self._parse_rhs() + self._expect(token.RSQB) + a.addarc(z) + return a, z + else: + a, z = self._parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self._gettoken() + z.addarc(a) + if value == "+": + return a, z + else: + return a, a + + def _parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == "(": + self._gettoken() + a, z = self._parse_rhs() + self._expect(token.RPAR) + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self._gettoken() + return a, z + else: + self._raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def _expect(self, type): + if self.type != type: + self._raise_error("expected %s(%s), got %s(%s)", + type, token.tok_name[type], self.type, self.value) + value = self.value + self._gettoken() + return value + + def _gettoken(self): + tup = next(self.generator) + while tup[0] in (token.COMMENT, token.NL): + tup = next(self.generator) + self.type, self.value, self.begin, prefix = tup + + def _raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + line = self._bnf_text.splitlines()[self.begin[0] - 1] + raise SyntaxError(msg, ('', self.begin[0], + self.begin[1], line)) + + +class NFAState(object): + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + + +class DFAState(object): + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + + +def generate_grammar(bnf_text, token_namespace): + """ + ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for + at-least-once repetition, [] for optional parts, | for alternatives and () + for grouping). + + It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its + own parser. + """ + p = ParserGenerator(bnf_text, token_namespace) + return p.make_grammar() diff --git a/pythonFiles/parso/python/__init__.py b/pythonFiles/parso/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/pythonFiles/parso/python/diff.py b/pythonFiles/parso/python/diff.py new file mode 100644 index 000000000000..f8b73c75d3d5 --- /dev/null +++ b/pythonFiles/parso/python/diff.py @@ -0,0 +1,594 @@ +""" +Basically a contains parser that is faster, because it tries to parse only +parts and if anything changes, it only reparses the changed parts. + +It works with a simple diff in the beginning and will try to reuse old parser +fragments. +""" +import re +import difflib +from collections import namedtuple +import logging + +from parso.utils import split_lines +from parso.python.parser import Parser +from parso.python.tree import EndMarker +from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, + ENDMARKER, INDENT, DEDENT) + +LOG = logging.getLogger(__name__) + + +def _get_last_line(node_or_leaf): + last_leaf = node_or_leaf.get_last_leaf() + if _ends_with_newline(last_leaf): + return last_leaf.start_pos[0] + else: + return last_leaf.end_pos[0] + + +def _ends_with_newline(leaf, suffix=''): + if leaf.type == 'error_leaf': + typ = leaf.original_type + else: + typ = leaf.type + + return typ == 'newline' or suffix.endswith('\n') + + +def _flows_finished(pgen_grammar, stack): + """ + if, while, for and try might not be finished, because another part might + still be parsed. + """ + for dfa, newstate, (symbol_number, nodes) in stack: + if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt', + 'for_stmt', 'try_stmt'): + return False + return True + + +def suite_or_file_input_is_valid(pgen_grammar, stack): + if not _flows_finished(pgen_grammar, stack): + return False + + for dfa, newstate, (symbol_number, nodes) in reversed(stack): + if pgen_grammar.number2symbol[symbol_number] == 'suite': + # If only newline is in the suite, the suite is not valid, yet. + return len(nodes) > 1 + # Not reaching a suite means that we're dealing with file_input levels + # where there's no need for a valid statement in it. It can also be empty. + return True + + +def _is_flow_node(node): + try: + value = node.children[0].value + except AttributeError: + return False + return value in ('if', 'for', 'while', 'try') + + +class _PositionUpdatingFinished(Exception): + pass + + +def _update_positions(nodes, line_offset, last_leaf): + for node in nodes: + try: + children = node.children + except AttributeError: + # Is a leaf + node.line += line_offset + if node is last_leaf: + raise _PositionUpdatingFinished + else: + _update_positions(children, line_offset, last_leaf) + + +class DiffParser(object): + """ + An advanced form of parsing a file faster. Unfortunately comes with huge + side effects. It changes the given module. + """ + def __init__(self, pgen_grammar, tokenizer, module): + self._pgen_grammar = pgen_grammar + self._tokenizer = tokenizer + self._module = module + + def _reset(self): + self._copy_count = 0 + self._parser_count = 0 + + self._nodes_stack = _NodesStack(self._module) + + def update(self, old_lines, new_lines): + ''' + The algorithm works as follows: + + Equal: + - Assure that the start is a newline, otherwise parse until we get + one. + - Copy from parsed_until_line + 1 to max(i2 + 1) + - Make sure that the indentation is correct (e.g. add DEDENT) + - Add old and change positions + Insert: + - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not + much more. + + Returns the new module node. + ''' + LOG.debug('diff parser start') + # Reset the used names cache so they get regenerated. + self._module._used_names = None + + self._parser_lines_new = new_lines + + self._reset() + + line_length = len(new_lines) + sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new) + opcodes = sm.get_opcodes() + LOG.debug('diff parser calculated') + LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length)) + + for operation, i1, i2, j1, j2 in opcodes: + LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]', + operation, i1 + 1, i2, j1 + 1, j2) + + if j2 == line_length and new_lines[-1] == '': + # The empty part after the last newline is not relevant. + j2 -= 1 + + if operation == 'equal': + line_offset = j1 - i1 + self._copy_from_old_parser(line_offset, i2, j2) + elif operation == 'replace': + self._parse(until_line=j2) + elif operation == 'insert': + self._parse(until_line=j2) + else: + assert operation == 'delete' + + # With this action all change will finally be applied and we have a + # changed module. + self._nodes_stack.close() + + last_pos = self._module.end_pos[0] + if last_pos != line_length: + current_lines = split_lines(self._module.get_code(), keepends=True) + diff = difflib.unified_diff(current_lines, new_lines) + raise Exception( + "There's an issue (%s != %s) with the diff parser. Please report:\n%s" + % (last_pos, line_length, ''.join(diff)) + ) + + LOG.debug('diff parser end') + return self._module + + def _enabled_debugging(self, old_lines, lines_new): + if self._module.get_code() != ''.join(lines_new): + LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), + ''.join(lines_new)) + + def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): + copied_nodes = [None] + + last_until_line = -1 + while until_line_new > self._nodes_stack.parsed_until_line: + parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset + line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) + if line_stmt is None: + # Parse 1 line at least. We don't need more, because we just + # want to get into a state where the old parser has statements + # again that can be copied (e.g. not lines within parentheses). + self._parse(self._nodes_stack.parsed_until_line + 1) + elif not copied_nodes: + # We have copied as much as possible (but definitely not too + # much). Therefore we just parse the rest. + # We might not reach the end, because there's a statement + # that is not finished. + self._parse(until_line_new) + else: + p_children = line_stmt.parent.children + index = p_children.index(line_stmt) + + copied_nodes = self._nodes_stack.copy_nodes( + p_children[index:], + until_line_old, + line_offset + ) + # Match all the nodes that are in the wanted range. + if copied_nodes: + self._copy_count += 1 + + from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset + to = self._nodes_stack.parsed_until_line + + LOG.debug('diff actually copy %s to %s', from_, to) + # Since there are potential bugs that might loop here endlessly, we + # just stop here. + assert last_until_line != self._nodes_stack.parsed_until_line \ + or not copied_nodes, last_until_line + last_until_line = self._nodes_stack.parsed_until_line + + def _get_old_line_stmt(self, old_line): + leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) + + if _ends_with_newline(leaf): + leaf = leaf.get_next_leaf() + if leaf.get_start_pos_of_prefix()[0] == old_line: + node = leaf + while node.parent.type not in ('file_input', 'suite'): + node = node.parent + return node + # Must be on the same line. Otherwise we need to parse that bit. + return None + + def _get_before_insertion_node(self): + if self._nodes_stack.is_empty(): + return None + + line = self._nodes_stack.parsed_until_line + 1 + node = self._new_module.get_last_leaf() + while True: + parent = node.parent + if parent.type in ('suite', 'file_input'): + assert node.end_pos[0] <= line + assert node.end_pos[1] == 0 or '\n' in self._prefix + return node + node = parent + + def _parse(self, until_line): + """ + Parses at least until the given line, but might just parse more until a + valid state is reached. + """ + last_until_line = 0 + while until_line > self._nodes_stack.parsed_until_line: + node = self._try_parse_part(until_line) + nodes = node.children + + self._nodes_stack.add_parsed_nodes(nodes) + LOG.debug( + 'parse_part from %s to %s (to %s in part parser)', + nodes[0].get_start_pos_of_prefix()[0], + self._nodes_stack.parsed_until_line, + node.end_pos[0] - 1 + ) + # Since the tokenizer sometimes has bugs, we cannot be sure that + # this loop terminates. Therefore assert that there's always a + # change. + assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line + last_until_line = self._nodes_stack.parsed_until_line + + def _try_parse_part(self, until_line): + """ + Sets up a normal parser that uses a spezialized tokenizer to only parse + until a certain position (or a bit longer if the statement hasn't + ended. + """ + self._parser_count += 1 + # TODO speed up, shouldn't copy the whole list all the time. + # memoryview? + parsed_until_line = self._nodes_stack.parsed_until_line + lines_after = self._parser_lines_new[parsed_until_line:] + #print('parse_content', parsed_until_line, lines_after, until_line) + tokens = self._diff_tokenize( + lines_after, + until_line, + line_offset=parsed_until_line + ) + self._active_parser = Parser( + self._pgen_grammar, + error_recovery=True + ) + return self._active_parser.parse(tokens=tokens) + + def _diff_tokenize(self, lines, until_line, line_offset=0): + is_first_token = True + omitted_first_indent = False + indents = [] + tokens = self._tokenizer(lines, (1, 0)) + stack = self._active_parser.pgen_parser.stack + for typ, string, start_pos, prefix in tokens: + start_pos = start_pos[0] + line_offset, start_pos[1] + if typ == INDENT: + indents.append(start_pos[1]) + if is_first_token: + omitted_first_indent = True + # We want to get rid of indents that are only here because + # we only parse part of the file. These indents would only + # get parsed as error leafs, which doesn't make any sense. + is_first_token = False + continue + is_first_token = False + + # In case of omitted_first_indent, it might not be dedented fully. + # However this is a sign for us that a dedent happened. + if typ == DEDENT \ + or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1: + indents.pop() + if omitted_first_indent and not indents: + # We are done here, only thing that can come now is an + # endmarker or another dedented code block. + typ, string, start_pos, prefix = next(tokens) + if '\n' in prefix: + prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) + else: + prefix = '' + yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) + break + elif typ == NEWLINE and start_pos[0] >= until_line: + yield PythonToken(typ, string, start_pos, prefix) + # Check if the parser is actually in a valid suite state. + if suite_or_file_input_is_valid(self._pgen_grammar, stack): + start_pos = start_pos[0] + 1, 0 + while len(indents) > int(omitted_first_indent): + indents.pop() + yield PythonToken(DEDENT, '', start_pos, '') + + yield PythonToken(ENDMARKER, '', start_pos, '') + break + else: + continue + + yield PythonToken(typ, string, start_pos, prefix) + + +class _NodesStackNode(object): + ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf') + + def __init__(self, tree_node, parent=None): + self.tree_node = tree_node + self.children_groups = [] + self.parent = parent + + def close(self): + children = [] + for children_part, line_offset, last_line_offset_leaf in self.children_groups: + if line_offset != 0: + try: + _update_positions( + children_part, line_offset, last_line_offset_leaf) + except _PositionUpdatingFinished: + pass + children += children_part + self.tree_node.children = children + # Reset the parents + for node in children: + node.parent = self.tree_node + + def add(self, children, line_offset=0, last_line_offset_leaf=None): + group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf) + self.children_groups.append(group) + + def get_last_line(self, suffix): + line = 0 + if self.children_groups: + children_group = self.children_groups[-1] + last_leaf = children_group.children[-1].get_last_leaf() + line = last_leaf.end_pos[0] + + # Calculate the line offsets + offset = children_group.line_offset + if offset: + # In case the line_offset is not applied to this specific leaf, + # just ignore it. + if last_leaf.line <= children_group.last_line_offset_leaf.line: + line += children_group.line_offset + + # Newlines end on the next line, which means that they would cover + # the next line. That line is not fully parsed at this point. + if _ends_with_newline(last_leaf, suffix): + line -= 1 + line += suffix.count('\n') + if suffix and not suffix.endswith('\n'): + # This is the end of a file (that doesn't end with a newline). + line += 1 + return line + + +class _NodesStack(object): + endmarker_type = 'endmarker' + + def __init__(self, module): + # Top of stack + self._tos = self._base_node = _NodesStackNode(module) + self._module = module + self._last_prefix = '' + self.prefix = '' + + def is_empty(self): + return not self._base_node.children + + @property + def parsed_until_line(self): + return self._tos.get_last_line(self.prefix) + + def _get_insertion_node(self, indentation_node): + indentation = indentation_node.start_pos[1] + + # find insertion node + node = self._tos + while True: + tree_node = node.tree_node + if tree_node.type == 'suite': + # A suite starts with NEWLINE, ... + node_indentation = tree_node.children[1].start_pos[1] + + if indentation >= node_indentation: # Not a Dedent + # We might be at the most outer layer: modules. We + # don't want to depend on the first statement + # having the right indentation. + return node + + elif tree_node.type == 'file_input': + return node + + node = self._close_tos() + + def _close_tos(self): + self._tos.close() + self._tos = self._tos.parent + return self._tos + + def add_parsed_nodes(self, tree_nodes): + tree_nodes = self._remove_endmarker(tree_nodes) + if not tree_nodes: + return + + assert tree_nodes[0].type != 'newline' + + node = self._get_insertion_node(tree_nodes[0]) + assert node.tree_node.type in ('suite', 'file_input') + node.add(tree_nodes) + self._update_tos(tree_nodes[-1]) + + def _remove_endmarker(self, tree_nodes): + """ + Helps cleaning up the tree nodes that get inserted. + """ + last_leaf = tree_nodes[-1].get_last_leaf() + is_endmarker = last_leaf.type == self.endmarker_type + self._last_prefix = '' + if is_endmarker: + try: + separation = last_leaf.prefix.rindex('\n') + 1 + except ValueError: + pass + else: + # Remove the whitespace part of the prefix after a newline. + # That is not relevant if parentheses were opened. Always parse + # until the end of a line. + last_leaf.prefix, self._last_prefix = \ + last_leaf.prefix[:separation], last_leaf.prefix[separation:] + + first_leaf = tree_nodes[0].get_first_leaf() + first_leaf.prefix = self.prefix + first_leaf.prefix + self.prefix = '' + + if is_endmarker: + self.prefix = last_leaf.prefix + + tree_nodes = tree_nodes[:-1] + return tree_nodes + + def copy_nodes(self, tree_nodes, until_line, line_offset): + """ + Copies tree nodes from the old parser tree. + + Returns the number of tree nodes that were copied. + """ + tos = self._get_insertion_node(tree_nodes[0]) + + new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset) + return new_nodes + + def _copy_nodes(self, tos, nodes, until_line, line_offset): + new_nodes = [] + + new_tos = tos + for node in nodes: + if node.start_pos[0] > until_line: + break + + if node.type == 'endmarker': + # We basically removed the endmarker, but we are not allowed to + # remove the newline at the end of the line, otherwise it's + # going to be missing. + try: + self.prefix = node.prefix[:node.prefix.rindex('\n') + 1] + except ValueError: + pass + # Endmarkers just distort all the checks below. Remove them. + break + + # TODO this check might take a bit of time for large files. We + # might want to change this to do more intelligent guessing or + # binary search. + if _get_last_line(node) > until_line: + # We can split up functions and classes later. + if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite': + new_nodes.append(node) + break + + new_nodes.append(node) + + if not new_nodes: + return [], tos + + last_node = new_nodes[-1] + line_offset_index = -1 + if last_node.type in ('classdef', 'funcdef'): + suite = last_node.children[-1] + if suite.type == 'suite': + suite_tos = _NodesStackNode(suite) + # Don't need to pass line_offset here, it's already done by the + # parent. + suite_nodes, recursive_tos = self._copy_nodes( + suite_tos, suite.children, until_line, line_offset) + if len(suite_nodes) < 2: + # A suite only with newline is not valid. + new_nodes.pop() + else: + suite_tos.parent = tos + new_tos = recursive_tos + line_offset_index = -2 + + elif (new_nodes[-1].type in ('error_leaf', 'error_node') or + _is_flow_node(new_nodes[-1])): + # Error leafs/nodes don't have a defined start/end. Error + # nodes might not end with a newline (e.g. if there's an + # open `(`). Therefore ignore all of them unless they are + # succeeded with valid parser state. + # If we copy flows at the end, they might be continued + # after the copy limit (in the new parser). + # In this while loop we try to remove until we find a newline. + new_nodes.pop() + while new_nodes: + last_node = new_nodes[-1] + if last_node.get_last_leaf().type == 'newline': + break + new_nodes.pop() + + if new_nodes: + try: + last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf() + except IndexError: + line_offset = 0 + # In this case we don't have to calculate an offset, because + # there's no children to be managed. + last_line_offset_leaf = None + tos.add(new_nodes, line_offset, last_line_offset_leaf) + return new_nodes, new_tos + + def _update_tos(self, tree_node): + if tree_node.type in ('suite', 'file_input'): + self._tos = _NodesStackNode(tree_node, self._tos) + self._tos.add(list(tree_node.children)) + self._update_tos(tree_node.children[-1]) + elif tree_node.type in ('classdef', 'funcdef'): + self._update_tos(tree_node.children[-1]) + + def close(self): + while self._tos is not None: + self._close_tos() + + # Add an endmarker. + try: + last_leaf = self._module.get_last_leaf() + end_pos = list(last_leaf.end_pos) + except IndexError: + end_pos = [1, 0] + lines = split_lines(self.prefix) + assert len(lines) > 0 + if len(lines) == 1: + end_pos[1] += len(lines[0]) + else: + end_pos[0] += len(lines) - 1 + end_pos[1] = len(lines[-1]) + + endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix) + endmarker.parent = self._module + self._module.children.append(endmarker) diff --git a/pythonFiles/parso/python/errors.py b/pythonFiles/parso/python/errors.py new file mode 100644 index 000000000000..cfb8380ea743 --- /dev/null +++ b/pythonFiles/parso/python/errors.py @@ -0,0 +1,994 @@ +# -*- coding: utf-8 -*- +import codecs +import warnings +import re +from contextlib import contextmanager + +from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule +from parso.python.tree import search_ancestor +from parso.parser import ParserSyntaxError + +_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') +_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist') +# This is the maximal block size given by python. +_MAX_BLOCK_SIZE = 20 +_MAX_INDENT_COUNT = 100 +ALLOWED_FUTURES = ( + 'all_feature_names', 'nested_scopes', 'generators', 'division', + 'absolute_import', 'with_statement', 'print_function', 'unicode_literals', +) + + +def _iter_stmts(scope): + """ + Iterates over all statements and splits up simple_stmt. + """ + for child in scope.children: + if child.type == 'simple_stmt': + for child2 in child.children: + if child2.type == 'newline' or child2 == ';': + continue + yield child2 + else: + yield child + + +def _get_comprehension_type(atom): + first, second = atom.children[:2] + if second.type == 'testlist_comp' and second.children[1].type == 'comp_for': + if first == '[': + return 'list comprehension' + else: + return 'generator expression' + elif second.type == 'dictorsetmaker' and second.children[-1].type == 'comp_for': + if second.children[1] == ':': + return 'dict comprehension' + else: + return 'set comprehension' + return None + + +def _is_future_import(import_from): + # It looks like a __future__ import that is relative is still a future + # import. That feels kind of odd, but whatever. + # if import_from.level != 0: + # return False + from_names = import_from.get_from_names() + return [n.value for n in from_names] == ['__future__'] + + +def _remove_parens(atom): + """ + Returns the inner part of an expression like `(foo)`. Also removes nested + parens. + """ + try: + children = atom.children + except AttributeError: + pass + else: + if len(children) == 3 and children[0] == '(': + return _remove_parens(atom.children[1]) + return atom + + +def _iter_params(parent_node): + return (n for n in parent_node.children if n.type == 'param') + + +def _is_future_import_first(import_from): + """ + Checks if the import is the first statement of a file. + """ + found_docstring = False + for stmt in _iter_stmts(import_from.get_root_node()): + if stmt.type == 'string' and not found_docstring: + continue + found_docstring = True + + if stmt == import_from: + return True + if stmt.type == 'import_from' and _is_future_import(stmt): + continue + return False + + +def _iter_definition_exprs_from_lists(exprlist): + for child in exprlist.children[::2]: + if child.type == 'atom' and child.children[0] in ('(', '['): + testlist_comp = child.children[0] + if testlist_comp.type == 'testlist_comp': + for expr in _iter_definition_exprs_from_lists(testlist_comp): + yield expr + continue + elif child.children[0] == '[': + yield testlist_comp + continue + + yield child + +def _get_expr_stmt_definition_exprs(expr_stmt): + exprs = [] + for list_ in expr_stmt.children[:-2:2]: + if list_.type in ('testlist_star_expr', 'testlist'): + exprs += _iter_definition_exprs_from_lists(list_) + else: + exprs.append(list_) + return exprs + + +def _get_for_stmt_definition_exprs(for_stmt): + exprlist = for_stmt.children[1] + if exprlist.type != 'exprlist': + return [exprlist] + return list(_iter_definition_exprs_from_lists(exprlist)) + + +class _Context(object): + def __init__(self, node, add_syntax_error, parent_context=None): + self.node = node + self.blocks = [] + self.parent_context = parent_context + self._used_name_dict = {} + self._global_names = [] + self._nonlocal_names = [] + self._nonlocal_names_in_subscopes = [] + self._add_syntax_error = add_syntax_error + + def is_async_funcdef(self): + # Stupidly enough async funcdefs can have two different forms, + # depending if a decorator is used or not. + return self.is_function() \ + and self.node.parent.type in ('async_funcdef', 'async_stmt') + + def is_function(self): + return self.node.type == 'funcdef' + + def add_name(self, name): + parent_type = name.parent.type + if parent_type == 'trailer': + # We are only interested in first level names. + return + + if parent_type == 'global_stmt': + self._global_names.append(name) + elif parent_type == 'nonlocal_stmt': + self._nonlocal_names.append(name) + else: + self._used_name_dict.setdefault(name.value, []).append(name) + + def finalize(self): + """ + Returns a list of nonlocal names that need to be part of that scope. + """ + self._analyze_names(self._global_names, 'global') + self._analyze_names(self._nonlocal_names, 'nonlocal') + + # Python2.6 doesn't have dict comprehensions. + global_name_strs = dict((n.value, n) for n in self._global_names) + for nonlocal_name in self._nonlocal_names: + try: + global_name = global_name_strs[nonlocal_name.value] + except KeyError: + continue + + message = "name '%s' is nonlocal and global" % global_name.value + if global_name.start_pos < nonlocal_name.start_pos: + error_name = global_name + else: + error_name = nonlocal_name + self._add_syntax_error(error_name, message) + + nonlocals_not_handled = [] + for nonlocal_name in self._nonlocal_names_in_subscopes: + search = nonlocal_name.value + if search in global_name_strs or self.parent_context is None: + message = "no binding for nonlocal '%s' found" % nonlocal_name.value + self._add_syntax_error(nonlocal_name, message) + elif not self.is_function() or \ + nonlocal_name.value not in self._used_name_dict: + nonlocals_not_handled.append(nonlocal_name) + return self._nonlocal_names + nonlocals_not_handled + + def _analyze_names(self, globals_or_nonlocals, type_): + def raise_(message): + self._add_syntax_error(base_name, message % (base_name.value, type_)) + + params = [] + if self.node.type == 'funcdef': + params = self.node.get_params() + + for base_name in globals_or_nonlocals: + found_global_or_nonlocal = False + # Somehow Python does it the reversed way. + for name in reversed(self._used_name_dict.get(base_name.value, [])): + if name.start_pos > base_name.start_pos: + # All following names don't have to be checked. + found_global_or_nonlocal = True + + parent = name.parent + if parent.type == 'param' and parent.name == name: + # Skip those here, these definitions belong to the next + # scope. + continue + + if name.is_definition(): + if parent.type == 'expr_stmt' \ + and parent.children[1].type == 'annassign': + if found_global_or_nonlocal: + # If it's after the global the error seems to be + # placed there. + base_name = name + raise_("annotated name '%s' can't be %s") + break + else: + message = "name '%s' is assigned to before %s declaration" + else: + message = "name '%s' is used prior to %s declaration" + + if not found_global_or_nonlocal: + raise_(message) + # Only add an error for the first occurence. + break + + for param in params: + if param.name.value == base_name.value: + raise_("name '%s' is parameter and %s"), + + @contextmanager + def add_block(self, node): + self.blocks.append(node) + yield + self.blocks.pop() + + def add_context(self, node): + return _Context(node, self._add_syntax_error, parent_context=self) + + def close_child_context(self, child_context): + self._nonlocal_names_in_subscopes += child_context.finalize() + + +class ErrorFinder(Normalizer): + """ + Searches for errors in the syntax tree. + """ + def __init__(self, *args, **kwargs): + super(ErrorFinder, self).__init__(*args, **kwargs) + self._error_dict = {} + self.version = self.grammar.version_info + + def initialize(self, node): + def create_context(node): + if node is None: + return None + + parent_context = create_context(node.parent) + if node.type in ('classdef', 'funcdef', 'file_input'): + return _Context(node, self._add_syntax_error, parent_context) + return parent_context + + self.context = create_context(node) or _Context(node, self._add_syntax_error) + self._indentation_count = 0 + + def visit(self, node): + if node.type == 'error_node': + with self.visit_node(node): + # Don't need to investigate the inners of an error node. We + # might find errors in there that should be ignored, because + # the error node itself already shows that there's an issue. + return '' + return super(ErrorFinder, self).visit(node) + + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + + if node.type in _BLOCK_STMTS: + with self.context.add_block(node): + if len(self.context.blocks) == _MAX_BLOCK_SIZE: + self._add_syntax_error(node, "too many statically nested blocks") + yield + return + elif node.type == 'suite': + self._indentation_count += 1 + if self._indentation_count == _MAX_INDENT_COUNT: + self._add_indentation_error(node.children[1], "too many levels of indentation") + + yield + + if node.type == 'suite': + self._indentation_count -= 1 + elif node.type in ('classdef', 'funcdef'): + context = self.context + self.context = context.parent_context + self.context.close_child_context(context) + + def visit_leaf(self, leaf): + if leaf.type == 'error_leaf': + if leaf.original_type in ('indent', 'error_dedent'): + # Indents/Dedents itself never have a prefix. They are just + # "pseudo" tokens that get removed by the syntax tree later. + # Therefore in case of an error we also have to check for this. + spacing = list(leaf.get_next_leaf()._split_prefix())[-1] + if leaf.original_type == 'indent': + message = 'unexpected indent' + else: + message = 'unindent does not match any outer indentation level' + self._add_indentation_error(spacing, message) + else: + if leaf.value.startswith('\\'): + message = 'unexpected character after line continuation character' + else: + match = re.match('\\w{,2}("{1,3}|\'{1,3})', leaf.value) + if match is None: + message = 'invalid syntax' + else: + if len(match.group(1)) == 1: + message = 'EOL while scanning string literal' + else: + message = 'EOF while scanning triple-quoted string literal' + self._add_syntax_error(leaf, message) + return '' + elif leaf.value == ':': + parent = leaf.parent + if parent.type in ('classdef', 'funcdef'): + self.context = self.context.add_context(parent) + + # The rest is rule based. + return super(ErrorFinder, self).visit_leaf(leaf) + + def _add_indentation_error(self, spacing, message): + self.add_issue(spacing, 903, "IndentationError: " + message) + + def _add_syntax_error(self, node, message): + self.add_issue(node, 901, "SyntaxError: " + message) + + def add_issue(self, node, code, message): + # Overwrite the default behavior. + # Check if the issues are on the same line. + line = node.start_pos[0] + args = (code, message, node) + self._error_dict.setdefault(line, args) + + def finalize(self): + self.context.finalize() + + for code, message, node in self._error_dict.values(): + self.issues.append(Issue(node, code, message)) + + +class IndentationRule(Rule): + code = 903 + + def _get_message(self, message): + message = super(IndentationRule, self)._get_message(message) + return "IndentationError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _ExpectIndentedBlock(IndentationRule): + message = 'expected an indented block' + + def get_node(self, node): + leaf = node.get_next_leaf() + return list(leaf._split_prefix())[-1] + + def is_issue(self, node): + # This is the beginning of a suite that is not indented. + return node.children[-1].type == 'newline' + + +class ErrorFinderConfig(NormalizerConfig): + normalizer_class = ErrorFinder + + +class SyntaxRule(Rule): + code = 901 + + def _get_message(self, message): + message = super(SyntaxRule, self)._get_message(message) + return "SyntaxError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _InvalidSyntaxRule(SyntaxRule): + message = "invalid syntax" + + def get_node(self, node): + return node.get_next_leaf() + + def is_issue(self, node): + # Error leafs will be added later as an error. + return node.get_next_leaf().type != 'error_leaf' + + +@ErrorFinder.register_rule(value='await') +class _AwaitOutsideAsync(SyntaxRule): + message = "'await' outside async function" + + def is_issue(self, leaf): + return not self._normalizer.context.is_async_funcdef() + + def get_error_node(self, node): + # Return the whole await statement. + return node.parent + + +@ErrorFinder.register_rule(value='break') +class _BreakOutsideLoop(SyntaxRule): + message = "'break' outside loop" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + return not in_loop + + +@ErrorFinder.register_rule(value='continue') +class _ContinueChecks(SyntaxRule): + message = "'continue' not properly in loop" + message_in_finally = "'continue' not supported inside 'finally' clause" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + if block.type == 'try_stmt': + last_block = block.children[-3] + if last_block == 'finally' and leaf.start_pos > last_block.start_pos: + self.add_issue(leaf, message=self.message_in_finally) + return False # Error already added + if not in_loop: + return True + + +@ErrorFinder.register_rule(value='from') +class _YieldFromCheck(SyntaxRule): + message = "'yield from' inside async function" + + def get_node(self, leaf): + return leaf.parent.parent # This is the actual yield statement. + + def is_issue(self, leaf): + return leaf.parent.type == 'yield_arg' \ + and self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='name') +class _NameChecks(SyntaxRule): + message = 'cannot assign to __debug__' + message_keyword = 'assignment to keyword' + message_none = 'cannot assign to None' + + def is_issue(self, leaf): + self._normalizer.context.add_name(leaf) + + if leaf.value == '__debug__' and leaf.is_definition(): + if self._normalizer.version < (3, 0): + return True + else: + self.add_issue(leaf, message=self.message_keyword) + if leaf.value == 'None' and self._normalizer.version < (3, 0) \ + and leaf.is_definition(): + self.add_issue(leaf, message=self.message_none) + + +@ErrorFinder.register_rule(type='string') +class _StringChecks(SyntaxRule): + message = "bytes can only contain ASCII literal characters." + + def is_issue(self, leaf): + string_prefix = leaf.string_prefix.lower() + if 'b' in string_prefix \ + and self._normalizer.version >= (3, 0) \ + and any(c for c in leaf.value if ord(c) > 127): + # b'ä' + return True + + if 'r' not in string_prefix: + # Raw strings don't need to be checked if they have proper + # escaping. + is_bytes = self._normalizer.version < (3, 0) + if 'b' in string_prefix: + is_bytes = True + if 'u' in string_prefix: + is_bytes = False + + payload = leaf._get_payload() + if is_bytes: + payload = payload.encode('utf-8') + func = codecs.escape_decode + else: + func = codecs.unicode_escape_decode + + try: + with warnings.catch_warnings(): + # The warnings from parsing strings are not relevant. + warnings.filterwarnings('ignore') + func(payload) + except UnicodeDecodeError as e: + self.add_issue(leaf, message='(unicode error) ' + str(e)) + except ValueError as e: + self.add_issue(leaf, message='(value error) ' + str(e)) + + +@ErrorFinder.register_rule(value='*') +class _StarCheck(SyntaxRule): + message = "named arguments must follow bare *" + + def is_issue(self, leaf): + params = leaf.parent + if params.type == 'parameters' and params: + after = params.children[params.children.index(leaf) + 1:] + after = [child for child in after + if child not in (',', ')') and not child.star_count] + return len(after) == 0 + + +@ErrorFinder.register_rule(value='**') +class _StarStarCheck(SyntaxRule): + # e.g. {**{} for a in [1]} + # TODO this should probably get a better end_pos including + # the next sibling of leaf. + message = "dict unpacking cannot be used in dict comprehension" + + def is_issue(self, leaf): + if leaf.parent.type == 'dictorsetmaker': + comp_for = leaf.get_next_sibling().get_next_sibling() + return comp_for is not None and comp_for.type == 'comp_for' + + +@ErrorFinder.register_rule(value='yield') +@ErrorFinder.register_rule(value='return') +class _ReturnAndYieldChecks(SyntaxRule): + message = "'return' with value in async generator" + message_async_yield = "'yield' inside async function" + + def get_node(self, leaf): + return leaf.parent + + def is_issue(self, leaf): + if self._normalizer.context.node.type != 'funcdef': + self.add_issue(self.get_node(leaf), message="'%s' outside function" % leaf.value) + elif self._normalizer.context.is_async_funcdef() \ + and any(self._normalizer.context.node.iter_yield_exprs()): + if leaf.value == 'return' and leaf.parent.type == 'return_stmt': + return True + elif leaf.value == 'yield' \ + and leaf.get_next_leaf() != 'from' \ + and self._normalizer.version == (3, 5): + self.add_issue(self.get_node(leaf), message=self.message_async_yield) + + +@ErrorFinder.register_rule(type='strings') +class _BytesAndStringMix(SyntaxRule): + # e.g. 's' b'' + message = "cannot mix bytes and nonbytes literals" + + def _is_bytes_literal(self, string): + return 'b' in string.string_prefix.lower() + + def is_issue(self, node): + first = node.children[0] + if first.type == 'string' and self._normalizer.version >= (3, 0): + first_is_bytes = self._is_bytes_literal(first) + for string in node.children[1:]: + if first_is_bytes != self._is_bytes_literal(string): + return True + + +@ErrorFinder.register_rule(type='import_as_names') +class _TrailingImportComma(SyntaxRule): + # e.g. from foo import a, + message = "trailing comma not allowed without surrounding parentheses" + + def is_issue(self, node): + if node.children[-1] == ',': + return True + + +@ErrorFinder.register_rule(type='import_from') +class _ImportStarInFunction(SyntaxRule): + message = "import * only allowed at module level" + + def is_issue(self, node): + return node.is_star_import() and self._normalizer.context.parent_context is not None + + +@ErrorFinder.register_rule(type='import_from') +class _FutureImportRule(SyntaxRule): + message = "from __future__ imports must occur at the beginning of the file" + + def is_issue(self, node): + if _is_future_import(node): + if not _is_future_import_first(node): + return True + + for from_name, future_name in node.get_paths(): + name = future_name.value + allowed_futures = list(ALLOWED_FUTURES) + if self._normalizer.version >= (3, 5): + allowed_futures.append('generator_stop') + + if name == 'braces': + self.add_issue(node, message = "not a chance") + elif name == 'barry_as_FLUFL': + m = "Seriously I'm not implementing this :) ~ Dave" + self.add_issue(node, message=m) + elif name not in ALLOWED_FUTURES: + message = "future feature %s is not defined" % name + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='star_expr') +class _StarExprRule(SyntaxRule): + message = "starred assignment target must be in a list or tuple" + message_iterable_unpacking = "iterable unpacking cannot be used in comprehension" + message_assignment = "can use starred expression only as assignment target" + + def is_issue(self, node): + if node.parent.type not in _STAR_EXPR_PARENTS: + return True + if node.parent.type == 'testlist_comp': + # [*[] for a in [1]] + if node.parent.children[1].type == 'comp_for': + self.add_issue(node, message=self.message_iterable_unpacking) + if self._normalizer.version <= (3, 4): + n = search_ancestor(node, 'for_stmt', 'expr_stmt') + found_definition = False + if n is not None: + if n.type == 'expr_stmt': + exprs = _get_expr_stmt_definition_exprs(n) + else: + exprs = _get_for_stmt_definition_exprs(n) + if node in exprs: + found_definition = True + + if not found_definition: + self.add_issue(node, message=self.message_assignment) + + +@ErrorFinder.register_rule(types=_STAR_EXPR_PARENTS) +class _StarExprParentRule(SyntaxRule): + def is_issue(self, node): + if node.parent.type == 'del_stmt': + self.add_issue(node.parent, message="can't use starred expression here") + else: + def is_definition(node, ancestor): + if ancestor is None: + return False + + type_ = ancestor.type + if type_ == 'trailer': + return False + + if type_ == 'expr_stmt': + return node.start_pos < ancestor.children[-1].start_pos + + return is_definition(node, ancestor.parent) + + if is_definition(node, node.parent): + args = [c for c in node.children if c != ','] + starred = [c for c in args if c.type == 'star_expr'] + if len(starred) > 1: + message = "two starred expressions in assignment" + self.add_issue(starred[1], message=message) + elif starred: + count = args.index(starred[0]) + if count >= 256: + message = "too many expressions in star-unpacking assignment" + self.add_issue(starred[0], message=message) + + +@ErrorFinder.register_rule(type='annassign') +class _AnnotatorRule(SyntaxRule): + # True: int + # {}: float + message = "illegal target for annotation" + + def get_node(self, node): + return node.parent + + def is_issue(self, node): + type_ = None + lhs = node.parent.children[0] + lhs = _remove_parens(lhs) + try: + children = lhs.children + except AttributeError: + pass + else: + if ',' in children or lhs.type == 'atom' and children[0] == '(': + type_ = 'tuple' + elif lhs.type == 'atom' and children[0] == '[': + type_ = 'list' + trailer = children[-1] + + if type_ is None: + if not (lhs.type == 'name' + # subscript/attributes are allowed + or lhs.type in ('atom_expr', 'power') + and trailer.type == 'trailer' + and trailer.children[0] != '('): + return True + else: + # x, y: str + message = "only single target (not %s) can be annotated" + self.add_issue(lhs.parent, message=message % type_) + + +@ErrorFinder.register_rule(type='argument') +class _ArgumentRule(SyntaxRule): + def is_issue(self, node): + first = node.children[0] + if node.children[1] == '=' and first.type != 'name': + if first.type == 'lambdef': + # f(lambda: 1=1) + message = "lambda cannot contain assignment" + else: + # f(+x=1) + message = "keyword can't be an expression" + self.add_issue(first, message=message) + + +@ErrorFinder.register_rule(type='nonlocal_stmt') +class _NonlocalModuleLevelRule(SyntaxRule): + message = "nonlocal declaration not allowed at module level" + + def is_issue(self, node): + return self._normalizer.context.parent_context is None + + +@ErrorFinder.register_rule(type='arglist') +class _ArglistRule(SyntaxRule): + @property + def message(self): + if self._normalizer.version < (3, 7): + return "Generator expression must be parenthesized if not sole argument" + else: + return "Generator expression must be parenthesized" + + def is_issue(self, node): + first_arg = node.children[0] + if first_arg.type == 'argument' \ + and first_arg.children[1].type == 'comp_for': + # e.g. foo(x for x in [], b) + return len(node.children) >= 2 + else: + arg_set = set() + kw_only = False + kw_unpacking_only = False + is_old_starred = False + # In python 3 this would be a bit easier (stars are part of + # argument), but we have to understand both. + for argument in node.children: + if argument == ',': + continue + + if argument in ('*', '**'): + # Python < 3.5 has the order engraved in the grammar + # file. No need to do anything here. + is_old_starred = True + continue + if is_old_starred: + is_old_starred = False + continue + + if argument.type == 'argument': + first = argument.children[0] + if first in ('*', '**'): + if first == '*': + if kw_unpacking_only: + # foo(**kwargs, *args) + message = "iterable argument unpacking follows keyword argument unpacking" + self.add_issue(argument, message=message) + else: + kw_unpacking_only = True + else: # Is a keyword argument. + kw_only = True + if first.type == 'name': + if first.value in arg_set: + # f(x=1, x=2) + self.add_issue(first, message="keyword argument repeated") + else: + arg_set.add(first.value) + else: + if kw_unpacking_only: + # f(**x, y) + message = "positional argument follows keyword argument unpacking" + self.add_issue(argument, message=message) + elif kw_only: + # f(x=2, y) + message = "positional argument follows keyword argument" + self.add_issue(argument, message=message) + +@ErrorFinder.register_rule(type='parameters') +@ErrorFinder.register_rule(type='lambdef') +class _ParameterRule(SyntaxRule): + # def f(x=3, y): pass + message = "non-default argument follows default argument" + + def is_issue(self, node): + param_names = set() + default_only = False + for p in _iter_params(node): + if p.name.value in param_names: + message = "duplicate argument '%s' in function definition" + self.add_issue(p.name, message=message % p.name.value) + param_names.add(p.name.value) + + if p.default is None and not p.star_count: + if default_only: + return True + else: + default_only = True + + +@ErrorFinder.register_rule(type='try_stmt') +class _TryStmtRule(SyntaxRule): + message = "default 'except:' must be last" + + def is_issue(self, try_stmt): + default_except = None + for except_clause in try_stmt.children[3::3]: + if except_clause in ('else', 'finally'): + break + if except_clause == 'except': + default_except = except_clause + elif default_except is not None: + self.add_issue(default_except, message=self.message) + + +@ErrorFinder.register_rule(type='fstring') +class _FStringRule(SyntaxRule): + _fstring_grammar = None + message_nested = "f-string: expressions nested too deeply" + message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'" + + def _check_format_spec(self, format_spec, depth): + self._check_fstring_contents(format_spec.children[1:], depth) + + def _check_fstring_expr(self, fstring_expr, depth): + if depth >= 2: + self.add_issue(fstring_expr, message=self.message_nested) + + conversion = fstring_expr.children[2] + if conversion.type == 'fstring_conversion': + name = conversion.children[1] + if name.value not in ('s', 'r', 'a'): + self.add_issue(name, message=self.message_conversion) + + format_spec = fstring_expr.children[-2] + if format_spec.type == 'fstring_format_spec': + self._check_format_spec(format_spec, depth + 1) + + def is_issue(self, fstring): + self._check_fstring_contents(fstring.children[1:-1]) + + def _check_fstring_contents(self, children, depth=0): + for fstring_content in children: + if fstring_content.type == 'fstring_expr': + self._check_fstring_expr(fstring_content, depth) + + +class _CheckAssignmentRule(SyntaxRule): + def _check_assignment(self, node, is_deletion=False): + error = None + type_ = node.type + if type_ == 'lambdef': + error = 'lambda' + elif type_ == 'atom': + first, second = node.children[:2] + error = _get_comprehension_type(node) + if error is None: + if second.type == 'dictorsetmaker': + error = 'literal' + elif first in ('(', '['): + if second.type == 'yield_expr': + error = 'yield expression' + elif second.type == 'testlist_comp': + # This is not a comprehension, they were handled + # further above. + for child in second.children[::2]: + self._check_assignment(child, is_deletion) + else: # Everything handled, must be useless brackets. + self._check_assignment(second, is_deletion) + elif type_ == 'keyword': + error = 'keyword' + elif type_ == 'operator': + if node.value == '...': + error = 'Ellipsis' + elif type_ == 'comparison': + error = 'comparison' + elif type_ in ('string', 'number', 'strings'): + error = 'literal' + elif type_ == 'yield_expr': + # This one seems to be a slightly different warning in Python. + message = 'assignment to yield expression not possible' + self.add_issue(node, message=message) + elif type_ == 'test': + error = 'conditional expression' + elif type_ in ('atom_expr', 'power'): + if node.children[0] == 'await': + error = 'await expression' + elif node.children[-2] == '**': + error = 'operator' + else: + # Has a trailer + trailer = node.children[-1] + assert trailer.type == 'trailer' + if trailer.children[0] == '(': + error = 'function call' + elif type_ in ('testlist_star_expr', 'exprlist', 'testlist'): + for child in node.children[::2]: + self._check_assignment(child, is_deletion) + elif ('expr' in type_ and type_ != 'star_expr' # is a substring + or '_test' in type_ + or type_ in ('term', 'factor')): + error = 'operator' + + if error is not None: + message = "can't %s %s" % ("delete" if is_deletion else "assign to", error) + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='comp_for') +class _CompForRule(_CheckAssignmentRule): + message = "asynchronous comprehension outside of an asynchronous function" + + def is_issue(self, node): + # Some of the nodes here are already used, so no else if + expr_list = node.children[1 + int(node.children[0] == 'async')] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) + + return node.children[0] == 'async' \ + and not self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='expr_stmt') +class _ExprStmtRule(_CheckAssignmentRule): + message = "illegal expression for augmented assignment" + + def is_issue(self, node): + for before_equal in node.children[:-2:2]: + self._check_assignment(before_equal) + + augassign = node.children[1] + if augassign != '=' and augassign.type != 'annassign': # Is augassign. + return node.children[0].type in ('testlist_star_expr', 'atom', 'testlist') + + +@ErrorFinder.register_rule(type='with_item') +class _WithItemRule(_CheckAssignmentRule): + def is_issue(self, with_item): + self._check_assignment(with_item.children[2]) + + +@ErrorFinder.register_rule(type='del_stmt') +class _DelStmtRule(_CheckAssignmentRule): + def is_issue(self, del_stmt): + child = del_stmt.children[1] + + if child.type != 'expr_list': # Already handled. + self._check_assignment(child, is_deletion=True) + + +@ErrorFinder.register_rule(type='expr_list') +class _ExprListRule(_CheckAssignmentRule): + def is_issue(self, expr_list): + for expr in expr_list.children[::2]: + self._check_assignment(expr) + + +@ErrorFinder.register_rule(type='for_stmt') +class _ForStmtRule(_CheckAssignmentRule): + def is_issue(self, for_stmt): + # Some of the nodes here are already used, so no else if + expr_list = for_stmt.children[1] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) diff --git a/pythonFiles/parso/python/grammar26.txt b/pythonFiles/parso/python/grammar26.txt new file mode 100644 index 000000000000..d9cede2e9da9 --- /dev/null +++ b/pythonFiles/parso/python/grammar26.txt @@ -0,0 +1,159 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Commands for Kees Blom's railroad program +#diagram:token NAME +#diagram:token NUMBER +#diagram:token STRING +#diagram:token NEWLINE +#diagram:token ENDMARKER +#diagram:token INDENT +#diagram:output\input python.bla +#diagram:token DEDENT +#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm +#diagram:rules + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item ':' suite +# Dave: Python2.6 actually defines a little bit of a different label called +# 'with_var'. However in 2.7+ this is the default. Apply it for +# consistency reasons. +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [('as' | ',') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [listmaker] ']' | + '{' [dictorsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | strings) +strings: STRING+ +listmaker: test ( list_for | (',' test)* [','] ) +# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the +# default. It's more consistent like this. +testlist_comp: test ( gen_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +# Dave: Rename from dictmaker to dictorsetmaker, because this is more +# consistent with the following grammars. +dictorsetmaker: test ':' test (',' test ':' test)* [','] + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +argument: test [gen_for] | test '=' test # Really [keyword '='] test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +gen_iter: gen_for | gen_if +gen_for: 'for' exprlist 'in' or_test [gen_iter] +gen_if: 'if' old_test [gen_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff --git a/pythonFiles/parso/python/grammar27.txt b/pythonFiles/parso/python/grammar27.txt new file mode 100644 index 000000000000..359f12b43e1f --- /dev/null +++ b/pythonFiles/parso/python/grammar27.txt @@ -0,0 +1,143 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [('as' | ',') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [listmaker] ']' | + '{' [dictorsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | strings) +strings: STRING+ +listmaker: test ( list_for | (',' test)* [','] ) +testlist_comp: test ( comp_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff --git a/pythonFiles/parso/python/grammar33.txt b/pythonFiles/parso/python/grammar33.txt new file mode 100644 index 000000000000..3a5580926797 --- /dev/null +++ b/pythonFiles/parso/python/grammar33.txt @@ -0,0 +1,134 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test # Really [keyword '='] test +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar34.txt b/pythonFiles/parso/python/grammar34.txt new file mode 100644 index 000000000000..324bba18753d --- /dev/null +++ b/pythonFiles/parso/python/grammar34.txt @@ -0,0 +1,134 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test # Really [keyword '='] test +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar35.txt b/pythonFiles/parso/python/grammar35.txt new file mode 100644 index 000000000000..5868b8f7031a --- /dev/null +++ b/pythonFiles/parso/python/grammar35.txt @@ -0,0 +1,153 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens +# skipping python3.5 compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/pythonFiles/parso/python/grammar36.txt b/pythonFiles/parso/python/grammar36.txt new file mode 100644 index 000000000000..b82c1fec1145 --- /dev/null +++ b/pythonFiles/parso/python/grammar36.txt @@ -0,0 +1,157 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of +# skipping python3.5+ compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/pythonFiles/parso/python/grammar37.txt b/pythonFiles/parso/python/grammar37.txt new file mode 100644 index 000000000000..7d112f79852b --- /dev/null +++ b/pythonFiles/parso/python/grammar37.txt @@ -0,0 +1,157 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of +# skipping python3.5+ compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/pythonFiles/parso/python/parser.py b/pythonFiles/parso/python/parser.py new file mode 100644 index 000000000000..7cdf987ab365 --- /dev/null +++ b/pythonFiles/parso/python/parser.py @@ -0,0 +1,265 @@ +from parso.python import tree +from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, + STRING, tok_name, NAME, FSTRING_STRING, + FSTRING_START, FSTRING_END) +from parso.parser import BaseParser +from parso.pgen2.parse import token_to_ilabel + + +class Parser(BaseParser): + """ + This class is used to parse a Python file, it then divides them into a + class structure of different scopes. + + :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. + """ + + node_map = { + 'expr_stmt': tree.ExprStmt, + 'classdef': tree.Class, + 'funcdef': tree.Function, + 'file_input': tree.Module, + 'import_name': tree.ImportName, + 'import_from': tree.ImportFrom, + 'break_stmt': tree.KeywordStatement, + 'continue_stmt': tree.KeywordStatement, + 'return_stmt': tree.ReturnStmt, + 'raise_stmt': tree.KeywordStatement, + 'yield_expr': tree.YieldExpr, + 'del_stmt': tree.KeywordStatement, + 'pass_stmt': tree.KeywordStatement, + 'global_stmt': tree.GlobalStmt, + 'nonlocal_stmt': tree.KeywordStatement, + 'print_stmt': tree.KeywordStatement, + 'assert_stmt': tree.AssertStmt, + 'if_stmt': tree.IfStmt, + 'with_stmt': tree.WithStmt, + 'for_stmt': tree.ForStmt, + 'while_stmt': tree.WhileStmt, + 'try_stmt': tree.TryStmt, + 'comp_for': tree.CompFor, + # Not sure if this is the best idea, but IMO it's the easiest way to + # avoid extreme amounts of work around the subtle difference of 2/3 + # grammar in list comoprehensions. + 'list_for': tree.CompFor, + # Same here. This just exists in Python 2.6. + 'gen_for': tree.CompFor, + 'decorator': tree.Decorator, + 'lambdef': tree.Lambda, + 'old_lambdef': tree.Lambda, + 'lambdef_nocond': tree.Lambda, + } + default_node = tree.PythonNode + + # Names/Keywords are handled separately + _leaf_map = { + STRING: tree.String, + NUMBER: tree.Number, + NEWLINE: tree.Newline, + ENDMARKER: tree.EndMarker, + FSTRING_STRING: tree.FStringString, + FSTRING_START: tree.FStringStart, + FSTRING_END: tree.FStringEnd, + } + + def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'): + super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery) + + self.syntax_errors = [] + self._omit_dedent_list = [] + self._indent_counter = 0 + + # TODO do print absolute import detection here. + # try: + # del python_grammar_no_print_statement.keywords["print"] + # except KeyError: + # pass # Doesn't exist in the Python 3 grammar. + + # if self.options["print_function"]: + # python_grammar = pygram.python_grammar_no_print_statement + # else: + + def parse(self, tokens): + if self._error_recovery: + if self._start_symbol != 'file_input': + raise NotImplementedError + + tokens = self._recovery_tokenize(tokens) + + node = super(Parser, self).parse(tokens) + + if self._start_symbol == 'file_input' != node.type: + # If there's only one statement, we get back a non-module. That's + # not what we want, we want a module, so we add it here: + node = self.convert_node( + self._pgen_grammar, + self._pgen_grammar.symbol2number['file_input'], + [node] + ) + + return node + + def convert_node(self, pgen_grammar, type, children): + """ + Convert raw node information to a PythonBaseNode instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + # TODO REMOVE symbol, we don't want type here. + symbol = pgen_grammar.number2symbol[type] + try: + return self.node_map[symbol](children) + except KeyError: + if symbol == 'suite': + # We don't want the INDENT/DEDENT in our parser tree. Those + # leaves are just cancer. They are virtual leaves and not real + # ones and therefore have pseudo start/end positions and no + # prefixes. Just ignore them. + children = [children[0]] + children[2:-1] + elif symbol == 'list_if': + # Make transitioning from 2 to 3 easier. + symbol = 'comp_if' + elif symbol == 'listmaker': + # Same as list_if above. + symbol = 'testlist_comp' + return self.default_node(symbol, children) + + def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): + # print('leaf', repr(value), token.tok_name[type]) + if type == NAME: + if value in pgen_grammar.keywords: + return tree.Keyword(value, start_pos, prefix) + else: + return tree.Name(value, start_pos, prefix) + + return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) + + def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + add_token_callback): + def get_symbol_and_nodes(stack): + for dfa, state, (type_, nodes) in stack: + symbol = pgen_grammar.number2symbol[type_] + yield symbol, nodes + + tos_nodes = stack.get_tos_nodes() + if tos_nodes: + last_leaf = tos_nodes[-1].get_last_leaf() + else: + last_leaf = None + + if self._start_symbol == 'file_input' and \ + (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): + def reduce_stack(states, newstate): + # reduce + state = newstate + while states[state] == [(0, state)]: + self.pgen_parser._pop() + + dfa, state, (type_, nodes) = stack[-1] + states, first = dfa + + + # In Python statements need to end with a newline. But since it's + # possible (and valid in Python ) that there's no newline at the + # end of a file, we have to recover even if the user doesn't want + # error recovery. + #print('x', pprint.pprint(stack)) + ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) + + dfa, state, (type_, nodes) = stack[-1] + symbol = pgen_grammar.number2symbol[type_] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + if ilabel == i: + if symbol == 'simple_stmt': + # This is basically shifting + stack[-1] = (dfa, newstate, (type_, nodes)) + + reduce_stack(states, newstate) + add_token_callback(typ, value, start_pos, prefix) + return + # Check if we're at the right point + #for symbol, nodes in get_symbol_and_nodes(stack): + # self.pgen_parser._pop() + + #break + break + #symbol = pgen_grammar.number2symbol[type_] + + if not self._error_recovery: + return super(Parser, self).error_recovery( + pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + add_token_callback) + + def current_suite(stack): + # For now just discard everything that is not a suite or + # file_input, if we detect an error. + for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))): + # `suite` can sometimes be only simple_stmt, not stmt. + if symbol == 'file_input': + break + elif symbol == 'suite' and len(nodes) > 1: + # suites without an indent in them get discarded. + break + return index, symbol, nodes + + index, symbol, nodes = current_suite(stack) + + # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) + if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos): + add_token_callback(typ, value, start_pos, prefix) + else: + if typ == INDENT: + # For every deleted INDENT we have to delete a DEDENT as well. + # Otherwise the parser will get into trouble and DEDENT too early. + self._omit_dedent_list.append(self._indent_counter) + + error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) + stack[-1][2][1].append(error_leaf) + + if symbol == 'suite': + dfa, state, node = stack[-1] + states, first = dfa + arcs = states[state] + intended_label = pgen_grammar.symbol2label['stmt'] + # Introduce a proper state transition. We're basically allowing + # there to be no valid statements inside a suite. + if [x[0] for x in arcs] == [intended_label]: + new_state = arcs[0][1] + stack[-1] = dfa, new_state, node + + def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos): + failed_stack = False + found = False + all_nodes = [] + for dfa, state, (type_, nodes) in stack[start_index:]: + if nodes: + found = True + if found: + failed_stack = True + all_nodes += nodes + if failed_stack: + stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes)) + + stack[start_index:] = [] + return failed_stack + + def _recovery_tokenize(self, tokens): + for typ, value, start_pos, prefix in tokens: + # print(tok_name[typ], repr(value), start_pos, repr(prefix)) + if typ == DEDENT: + # We need to count indents, because if we just omit any DEDENT, + # we might omit them in the wrong place. + o = self._omit_dedent_list + if o and o[-1] == self._indent_counter: + o.pop() + continue + + self._indent_counter -= 1 + elif typ == INDENT: + self._indent_counter += 1 + yield typ, value, start_pos, prefix diff --git a/pythonFiles/parso/python/pep8.py b/pythonFiles/parso/python/pep8.py new file mode 100644 index 000000000000..59fe452d06c4 --- /dev/null +++ b/pythonFiles/parso/python/pep8.py @@ -0,0 +1,727 @@ +import re +from contextlib import contextmanager + +from parso.python.errors import ErrorFinder, ErrorFinderConfig +from parso.normalizer import Rule +from parso.python.tree import search_ancestor, Flow, Scope + + +_IMPORT_TYPES = ('import_name', 'import_from') +_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt', + 'for_stmt', 'try_stmt', 'with_stmt') +_NON_STAR_TYPES = ('term', 'import_from', 'power') +_OPENING_BRACKETS = '(', '[', '{' +_CLOSING_BRACKETS = ')', ']', '}' +_FACTOR = '+', '-', '~' +_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@' +_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^' +_NEEDS_SPACE = ('=', '%', '->', + '<', '>', '==', '>=', '<=', '<>', '!=', + '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', + '>>=', '**=', '//=') +_NEEDS_SPACE += _BITWISE_OPERATOR +_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument') +_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop') + + +class IndentationTypes(object): + VERTICAL_BRACKET = object() + HANGING_BRACKET = object() + BACKSLASH = object() + SUITE = object() + IMPLICIT = object() + + +class IndentationNode(object): + type = IndentationTypes.SUITE + + def __init__(self, config, indentation, parent=None): + self.bracket_indentation = self.indentation = indentation + self.parent = parent + + def __repr__(self): + return '<%s>' % self.__class__.__name__ + + def get_latest_suite_node(self): + n = self + while n is not None: + if n.type == IndentationTypes.SUITE: + return n + + n = n.parent + + +class BracketNode(IndentationNode): + def __init__(self, config, leaf, parent, in_suite_introducer=False): + self.leaf = leaf + + # Figure out here what the indentation is. For chained brackets + # we can basically use the previous indentation. + previous_leaf = leaf + n = parent + if n.type == IndentationTypes.IMPLICIT: + n = n.parent + while True: + if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line: + break + + previous_leaf = previous_leaf.get_previous_leaf() + if not isinstance(n, BracketNode) or previous_leaf != n.leaf: + break + n = n.parent + parent_indentation = n.indentation + + + next_leaf = leaf.get_next_leaf() + if '\n' in next_leaf.prefix: + # This implies code like: + # foobarbaz( + # a, + # b, + # ) + self.bracket_indentation = parent_indentation \ + + config.closing_bracket_hanging_indentation + self.indentation = parent_indentation + config.indentation + self.type = IndentationTypes.HANGING_BRACKET + else: + # Implies code like: + # foobarbaz( + # a, + # b, + # ) + expected_end_indent = leaf.end_pos[1] + if '\t' in config.indentation: + self.indentation = None + else: + self.indentation = ' ' * expected_end_indent + self.bracket_indentation = self.indentation + self.type = IndentationTypes.VERTICAL_BRACKET + + if in_suite_introducer and parent.type == IndentationTypes.SUITE \ + and self.indentation == parent_indentation + config.indentation: + self.indentation += config.indentation + # The closing bracket should have the same indentation. + self.bracket_indentation = self.indentation + self.parent = parent + + +class ImplicitNode(BracketNode): + """ + Implicit indentation after keyword arguments, default arguments, + annotations and dict values. + """ + def __init__(self, config, leaf, parent): + super(ImplicitNode, self).__init__(config, leaf, parent) + self.type = IndentationTypes.IMPLICIT + + next_leaf = leaf.get_next_leaf() + if leaf == ':' and '\n' not in next_leaf.prefix: + self.indentation += ' ' + + +class BackslashNode(IndentationNode): + type = IndentationTypes.BACKSLASH + + def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): + expr_stmt = search_ancestor(containing_leaf, 'expr_stmt') + if expr_stmt is not None: + equals = expr_stmt.children[-2] + + if '\t' in config.indentation: + # TODO unite with the code of BracketNode + self.indentation = None + else: + # If the backslash follows the equals, use normal indentation + # otherwise it should align with the equals. + if equals.end_pos == spacing.start_pos: + self.indentation = parent_indentation + config.indentation + else: + # +1 because there is a space. + self.indentation = ' ' * (equals.end_pos[1] + 1) + else: + self.indentation = parent_indentation + config.indentation + self.bracket_indentation = self.indentation + self.parent = parent + + +def _is_magic_name(name): + return name.value.startswith('__') and name.value.endswith('__') + + +class PEP8Normalizer(ErrorFinder): + def __init__(self, *args, **kwargs): + super(PEP8Normalizer, self).__init__(*args, **kwargs) + self._previous_part = None + self._previous_leaf = None + self._on_newline = True + self._newline_count = 0 + self._wanted_newline_count = None + self._max_new_lines_in_prefix = 0 + self._new_statement = True + self._implicit_indentation_possible = False + # The top of stack of the indentation nodes. + self._indentation_tos = self._last_indentation_tos = \ + IndentationNode(self._config, indentation='') + self._in_suite_introducer = False + + if ' ' in self._config.indentation: + self._indentation_type = 'spaces' + self._wrong_indentation_char = '\t' + else: + self._indentation_type = 'tabs' + self._wrong_indentation_char = ' ' + + @contextmanager + def visit_node(self, node): + with super(PEP8Normalizer, self).visit_node(node): + with self._visit_node(node): + yield + + @contextmanager + def _visit_node(self, node): + typ = node.type + + if typ in 'import_name': + names = node.get_defined_names() + if len(names) > 1: + for name in names[:1]: + self.add_issue(name, 401, 'Multiple imports on one line') + elif typ == 'lambdef': + expr_stmt = node.parent + # Check if it's simply defining a single name, not something like + # foo.bar or x[1], where using a lambda could make more sense. + if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]): + self.add_issue(node, 731, 'Do not assign a lambda expression, use a def') + elif typ == 'try_stmt': + for child in node.children: + # Here we can simply check if it's an except, because otherwise + # it would be an except_clause. + if child.type == 'keyword' and child.value == 'except': + self.add_issue(child, 722, 'Do not use bare except, specify exception instead') + elif typ == 'comparison': + for child in node.children: + if child.type not in ('atom_expr', 'power'): + continue + if len(child.children) > 2: + continue + trailer = child.children[1] + atom = child.children[0] + if trailer.type == 'trailer' and atom.type == 'name' \ + and atom.value == 'type': + self.add_issue(node, 721, "Do not compare types, use 'isinstance()") + break + elif typ == 'file_input': + endmarker = node.children[-1] + prev = endmarker.get_previous_leaf() + prefix = endmarker.prefix + if (not prefix.endswith('\n') and ( + prefix or prev is None or prev.value != '\n')): + self.add_issue(endmarker, 292, "No newline at end of file") + + if typ in _IMPORT_TYPES: + simple_stmt = node.parent + module = simple_stmt.parent + #if module.type == 'simple_stmt': + if module.type == 'file_input': + index = module.children.index(simple_stmt) + for child in module.children[:index]: + children = [child] + if child.type == 'simple_stmt': + # Remove the newline. + children = child.children[:-1] + + found_docstring = False + for c in children: + if c.type == 'string' and not found_docstring: + continue + found_docstring = True + + if c.type == 'expr_stmt' and \ + all(_is_magic_name(n) for n in c.get_defined_names()): + continue + + if c.type in _IMPORT_TYPES or isinstance(c, Flow): + continue + + self.add_issue(node, 402, 'Module level import not at top of file') + break + else: + continue + break + + implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES + in_introducer = typ in _SUITE_INTRODUCERS + if in_introducer: + self._in_suite_introducer = True + elif typ == 'suite': + if self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + self._indentation_tos = IndentationNode( + self._config, + self._indentation_tos.indentation + self._config.indentation, + parent=self._indentation_tos + ) + elif implicit_indentation_possible: + self._implicit_indentation_possible = True + yield + if typ == 'suite': + assert self._indentation_tos.type == IndentationTypes.SUITE + self._indentation_tos = self._indentation_tos.parent + # If we dedent, no lines are needed anymore. + self._wanted_newline_count = None + elif implicit_indentation_possible: + self._implicit_indentation_possible = False + if self._indentation_tos.type == IndentationTypes.IMPLICIT: + self._indentation_tos = self._indentation_tos.parent + elif in_introducer: + self._in_suite_introducer = False + if typ in ('classdef', 'funcdef'): + self._wanted_newline_count = self._get_wanted_blank_lines_count() + + def _check_tabs_spaces(self, spacing): + if self._wrong_indentation_char in spacing.value: + self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type) + return True + return False + + def _get_wanted_blank_lines_count(self): + suite_node = self._indentation_tos.get_latest_suite_node() + return int(suite_node.parent is None) + 1 + + def _reset_newlines(self, spacing, leaf, is_comment=False): + self._max_new_lines_in_prefix = \ + max(self._max_new_lines_in_prefix, self._newline_count) + + wanted = self._wanted_newline_count + if wanted is not None: + # Need to substract one + blank_lines = self._newline_count - 1 + if wanted > blank_lines and leaf.type != 'endmarker': + # In case of a comment we don't need to add the issue, yet. + if not is_comment: + # TODO end_pos wrong. + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, blank_lines) + self.add_issue(spacing, code, message) + self._wanted_newline_count = None + else: + self._wanted_newline_count = None + + if not is_comment: + wanted = self._get_wanted_blank_lines_count() + actual = self._max_new_lines_in_prefix - 1 + + val = leaf.value + needs_lines = ( + val == '@' and leaf.parent.type == 'decorator' + or ( + val == 'class' + or val == 'async' and leaf.get_next_leaf() == 'def' + or val == 'def' and self._previous_leaf != 'async' + ) and leaf.parent.parent.type != 'decorated' + ) + if needs_lines and actual < wanted: + func_or_cls = leaf.parent + suite = func_or_cls.parent + if suite.type == 'decorated': + suite = suite.parent + + # The first leaf of a file or a suite should not need blank + # lines. + if suite.children[int(suite.type == 'suite')] != func_or_cls: + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, actual) + self.add_issue(spacing, code, message) + + self._max_new_lines_in_prefix = 0 + + self._newline_count = 0 + + def visit_leaf(self, leaf): + super(PEP8Normalizer, self).visit_leaf(leaf) + for part in leaf._split_prefix(): + if part.type == 'spacing': + # This part is used for the part call after for. + break + self._visit_part(part, part.create_spacing_part(), leaf) + + self._analyse_non_prefix(leaf) + self._visit_part(leaf, part, leaf) + + # Cleanup + self._last_indentation_tos = self._indentation_tos + + self._new_statement = leaf.type == 'newline' + + # TODO does this work? with brackets and stuff? + if leaf.type == 'newline' and \ + self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS: + self._in_suite_introducer = False + elif leaf.value == 'elif': + self._in_suite_introducer = True + + if not self._new_statement: + self._reset_newlines(part, leaf) + self._max_blank_lines = 0 + + self._previous_leaf = leaf + + return leaf.value + + def _visit_part(self, part, spacing, leaf): + value = part.value + type_ = part.type + if type_ == 'error_leaf': + return + + if value == ',' and part.parent.type == 'dictorsetmaker': + self._indentation_tos = self._indentation_tos.parent + + node = self._indentation_tos + + if type_ == 'comment': + if value.startswith('##'): + # Whole blocks of # should not raise an error. + if value.lstrip('#'): + self.add_issue(part, 266, "Too many leading '#' for block comment.") + elif self._on_newline: + if not re.match('#:? ', value) and not value == '#' \ + and not (value.startswith('#!') and part.start_pos == (1, 0)): + self.add_issue(part, 265, "Block comment should start with '# '") + else: + if not re.match('#:? [^ ]', value): + self.add_issue(part, 262, "Inline comment should start with '# '") + + self._reset_newlines(spacing, leaf, is_comment=True) + elif type_ == 'newline': + if self._newline_count > self._get_wanted_blank_lines_count(): + self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count) + elif leaf in ('def', 'class') \ + and leaf.parent.parent.type == 'decorated': + self.add_issue(part, 304, "Blank lines found after function decorator") + + + self._newline_count += 1 + + if type_ == 'backslash': + # TODO is this enough checking? What about ==? + if node.type != IndentationTypes.BACKSLASH: + if node.type != IndentationTypes.SUITE: + self.add_issue(part, 502, 'The backslash is redundant between brackets') + else: + indentation = node.indentation + if self._in_suite_introducer and node.type == IndentationTypes.SUITE: + indentation += self._config.indentation + + self._indentation_tos = BackslashNode( + self._config, + indentation, + part, + spacing, + parent=self._indentation_tos + ) + elif self._on_newline: + indentation = spacing.value + if node.type == IndentationTypes.BACKSLASH \ + and self._previous_part.type == 'newline': + self._indentation_tos = self._indentation_tos.parent + + if not self._check_tabs_spaces(spacing): + should_be_indentation = node.indentation + if type_ == 'comment': + # Comments can be dedented. So we have to care for that. + n = self._last_indentation_tos + while True: + if len(indentation) > len(n.indentation): + break + + should_be_indentation = n.indentation + + self._last_indentation_tos = n + if n == node: + break + n = n.parent + + if self._new_statement: + if type_ == 'newline': + if indentation: + self.add_issue(spacing, 291, 'Trailing whitespace') + elif indentation != should_be_indentation: + s = '%s %s' % (len(self._config.indentation), self._indentation_type) + self.add_issue(part, 111, 'Indentation is not a multiple of ' + s) + else: + if value in '])}': + should_be_indentation = node.bracket_indentation + else: + should_be_indentation = node.indentation + if self._in_suite_introducer and indentation == \ + node.get_latest_suite_node().indentation \ + + self._config.indentation: + self.add_issue(part, 129, "Line with same indent as next logical block") + elif indentation != should_be_indentation: + if not self._check_tabs_spaces(spacing) and part.value != '\n': + if value in '])}': + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 124, "Closing bracket does not match visual indentation") + else: + self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line") + else: + if len(indentation) < len(should_be_indentation): + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 128, 'Continuation line under-indented for visual indent') + elif node.type == IndentationTypes.BACKSLASH: + self.add_issue(part, 122, 'Continuation line missing indentation or outdented') + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 135, 'xxx') + else: + self.add_issue(part, 121, 'Continuation line under-indented for hanging indent') + else: + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 127, 'Continuation line over-indented for visual indent') + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 136, 'xxx') + else: + self.add_issue(part, 126, 'Continuation line over-indented for hanging indent') + else: + self._check_spacing(part, spacing) + + self._check_line_length(part, spacing) + # ------------------------------- + # Finalizing. Updating the state. + # ------------------------------- + if value and value in '()[]{}' and type_ != 'error_leaf' \ + and part.parent.type != 'error_node': + if value in _OPENING_BRACKETS: + self._indentation_tos = BracketNode( + self._config, part, + parent=self._indentation_tos, + in_suite_introducer=self._in_suite_introducer + ) + else: + assert node.type != IndentationTypes.IMPLICIT + self._indentation_tos = self._indentation_tos.parent + elif value in ('=', ':') and self._implicit_indentation_possible \ + and part.parent.type in _IMPLICIT_INDENTATION_TYPES: + indentation = node.indentation + self._indentation_tos = ImplicitNode( + self._config, part, parent=self._indentation_tos + ) + + self._on_newline = type_ in ('newline', 'backslash', 'bom') + + self._previous_part = part + self._previous_spacing = spacing + + def _check_line_length(self, part, spacing): + if part.type == 'backslash': + last_column = part.start_pos[1] + 1 + else: + last_column = part.end_pos[1] + if last_column > self._config.max_characters \ + and spacing.start_pos[1] <= self._config.max_characters : + # Special case for long URLs in multi-line docstrings or comments, + # but still report the error when the 72 first chars are whitespaces. + report = True + if part.type == 'comment': + splitted = part.value[1:].split() + if len(splitted) == 1 \ + and (part.end_pos[1] - len(splitted[0])) < 72: + report = False + if report: + self.add_issue( + part, + 501, + 'Line too long (%s > %s characters)' % + (last_column, self._config.max_characters), + ) + + def _check_spacing(self, part, spacing): + def add_if_spaces(*args): + if spaces: + return self.add_issue(*args) + + def add_not_spaces(*args): + if not spaces: + return self.add_issue(*args) + + spaces = spacing.value + prev = self._previous_part + if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf': + return + + type_ = part.type + if '\t' in spaces: + self.add_issue(spacing, 223, 'Used tab to separate tokens') + elif type_ == 'comment': + if len(spaces) < self._config.spaces_before_comment: + self.add_issue(spacing, 261, 'At least two spaces before inline comment') + elif type_ == 'newline': + add_if_spaces(spacing, 291, 'Trailing whitespace') + elif len(spaces) > 1: + self.add_issue(spacing, 221, 'Multiple spaces used') + else: + if prev in _OPENING_BRACKETS: + message = "Whitespace after '%s'" % part.value + add_if_spaces(spacing, 201, message) + elif part in _CLOSING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 202, message) + elif part in (',', ';') or part == ':' \ + and part.parent.type not in _POSSIBLE_SLICE_PARENTS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 203, message) + elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS: + pass # TODO + elif prev in (',', ';', ':'): + add_not_spaces(spacing, 231, "missing whitespace after '%s'") + elif part == ':': # Is a subscript + # TODO + pass + elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \ + or prev in ('*', '**') \ + and prev.parent.type not in _NON_STAR_TYPES: + # TODO + pass + elif prev in _FACTOR and prev.parent.type == 'factor': + pass + elif prev == '@' and prev.parent.type == 'decorator': + pass # TODO should probably raise an error if there's a space here + elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE: + if part == '=' and part.parent.type in ('argument', 'param') \ + or prev == '=' and prev.parent.type in ('argument', 'param'): + if part == '=': + param = part.parent + else: + param = prev.parent + if param.type == 'param' and param.annotation: + add_not_spaces(spacing, 252, 'Expected spaces around annotation equals') + else: + add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals') + elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: + add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator') + elif part == '%' or prev == '%': + add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator') + else: + message_225 = 'Missing whitespace between tokens' + add_not_spaces(spacing, 225, message_225) + elif type_ == 'keyword' or prev.type == 'keyword': + add_not_spaces(spacing, 275, 'Missing whitespace around keyword') + else: + prev_spacing = self._previous_spacing + if prev in _ALLOW_SPACE and spaces != prev_spacing.value \ + and '\n' not in self._previous_leaf.prefix: + message = "Whitespace before operator doesn't match with whitespace after" + self.add_issue(spacing, 229, message) + + if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: + message_225 = 'Missing whitespace between tokens' + #print('xy', spacing) + #self.add_issue(spacing, 225, message_225) + # TODO why only brackets? + if part in _OPENING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 211, message) + + def _analyse_non_prefix(self, leaf): + typ = leaf.type + if typ == 'name' and leaf.value in ('l', 'O', 'I'): + if leaf.is_definition(): + message = "Do not define %s named 'l', 'O', or 'I' one line" + if leaf.parent.type == 'class' and leaf.parent.name == leaf: + self.add_issue(leaf, 742, message % 'classes') + elif leaf.parent.type == 'function' and leaf.parent.name == leaf: + self.add_issue(leaf, 743, message % 'function') + else: + self.add_issuadd_issue(741, message % 'variables', leaf) + elif leaf.value == ':': + if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef': + next_leaf = leaf.get_next_leaf() + if next_leaf.type != 'newline': + if leaf.parent.type == 'funcdef': + self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)') + else: + self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)') + elif leaf.value == ';': + if leaf.get_next_leaf().type in ('newline', 'endmarker'): + self.add_issue(leaf, 703, 'Statement ends with a semicolon') + else: + self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)') + elif leaf.value in ('==', '!='): + comparison = leaf.parent + index = comparison.children.index(leaf) + left = comparison.children[index - 1] + right = comparison.children[index + 1] + for node in left, right: + if node.type == 'keyword' or node.type == 'name': + if node.value == 'None': + message = "comparison to None should be 'if cond is None:'" + self.add_issue(leaf, 711, message) + break + elif node.value in ('True', 'False'): + message = "comparison to False/True should be 'if cond is True:' or 'if cond:'" + self.add_issue(leaf, 712, message) + break + elif leaf.value in ('in', 'is'): + comparison = leaf.parent + if comparison.type == 'comparison' and comparison.parent.type == 'not_test': + if leaf.value == 'in': + self.add_issue(leaf, 713, "test for membership should be 'not in'") + else: + self.add_issue(leaf, 714, "test for object identity should be 'is not'") + elif typ == 'string': + # Checking multiline strings + for i, line in enumerate(leaf.value.splitlines()[1:]): + indentation = re.match('[ \t]*', line).group(0) + start_pos = leaf.line + i, len(indentation) + # TODO check multiline indentation. + elif typ == 'endmarker': + if self._newline_count >= 2: + self.add_issue(leaf, 391, 'Blank line at end of file') + + def add_issue(self, node, code, message): + if self._previous_leaf is not None: + if search_ancestor(self._previous_leaf, 'error_node') is not None: + return + if self._previous_leaf.type == 'error_leaf': + return + if search_ancestor(node, 'error_node') is not None: + return + if code in (901, 903): + # 901 and 903 are raised by the ErrorFinder. + super(PEP8Normalizer, self).add_issue(node, code, message) + else: + # Skip ErrorFinder here, because it has custom behavior. + super(ErrorFinder, self).add_issue(node, code, message) + + +class PEP8NormalizerConfig(ErrorFinderConfig): + normalizer_class = PEP8Normalizer + """ + Normalizing to PEP8. Not really implemented, yet. + """ + def __init__(self, indentation=' ' * 4, hanging_indentation=None, + max_characters=79, spaces_before_comment=2): + self.indentation = indentation + if hanging_indentation is None: + hanging_indentation = indentation + self.hanging_indentation = hanging_indentation + self.closing_bracket_hanging_indentation = '' + self.break_after_binary = False + self.max_characters = max_characters + self.spaces_before_comment = spaces_before_comment + + +# TODO this is not yet ready. +#@PEP8Normalizer.register_rule(type='endmarker') +class BlankLineAtEnd(Rule): + code = 392 + message = 'Blank line at end of file' + + def is_issue(self, leaf): + return self._newline_count >= 2 diff --git a/pythonFiles/parso/python/prefix.py b/pythonFiles/parso/python/prefix.py new file mode 100644 index 000000000000..b7f1e1bc4db9 --- /dev/null +++ b/pythonFiles/parso/python/prefix.py @@ -0,0 +1,97 @@ +import re +from codecs import BOM_UTF8 + +from parso.python.tokenize import group + +unicode_bom = BOM_UTF8.decode('utf-8') + + +class PrefixPart(object): + def __init__(self, leaf, typ, value, spacing='', start_pos=None): + assert start_pos is not None + self.parent = leaf + self.type = typ + self.value = value + self.spacing = spacing + self.start_pos = start_pos + + @property + def end_pos(self): + if self.value.endswith('\n'): + return self.start_pos[0] + 1, 0 + if self.value == unicode_bom: + # The bom doesn't have a length at the start of a Python file. + return self.start_pos + return self.start_pos[0], self.start_pos[1] + len(self.value) + + def create_spacing_part(self): + column = self.start_pos[1] - len(self.spacing) + return PrefixPart( + self.parent, 'spacing', self.spacing, + start_pos=(self.start_pos[0], column) + ) + + def __repr__(self): + return '%s(%s, %s, %s)' % ( + self.__class__.__name__, + self.type, + repr(self.value), + self.start_pos + ) + + +_comment = r'#[^\n\r\f]*' +_backslash = r'\\\r?\n' +_newline = r'\r?\n' +_form_feed = r'\f' +_only_spacing = '$' +_spacing = r'[ \t]*' +_bom = unicode_bom + +_regex = group( + _comment, _backslash, _newline, _form_feed, _only_spacing, _bom, + capture=True +) +_regex = re.compile(group(_spacing, capture=True) + _regex) + + +_types = { + '#': 'comment', + '\\': 'backslash', + '\f': 'formfeed', + '\n': 'newline', + '\r': 'newline', + unicode_bom: 'bom' +} + + +def split_prefix(leaf, start_pos): + line, column = start_pos + start = 0 + value = spacing = '' + bom = False + while start != len(leaf.prefix): + match =_regex.match(leaf.prefix, start) + spacing = match.group(1) + value = match.group(2) + if not value: + break + type_ = _types[value[0]] + yield PrefixPart( + leaf, type_, value, spacing, + start_pos=(line, column + start - int(bom) + len(spacing)) + ) + if type_ == 'bom': + bom = True + + start = match.end(0) + if value.endswith('\n'): + line += 1 + column = -start + + if value: + spacing = '' + yield PrefixPart( + leaf, 'spacing', spacing, + start_pos=(line, column + start) + ) diff --git a/pythonFiles/parso/python/token.py b/pythonFiles/parso/python/token.py new file mode 100644 index 000000000000..dd849b01daa7 --- /dev/null +++ b/pythonFiles/parso/python/token.py @@ -0,0 +1,113 @@ +from __future__ import absolute_import +from itertools import count +from token import * + +from parso._compatibility import py_version + + +_counter = count(N_TOKENS) +# Never want to see this thing again. +del N_TOKENS + +COMMENT = next(_counter) +tok_name[COMMENT] = 'COMMENT' + +NL = next(_counter) +tok_name[NL] = 'NL' + +# Sets the attributes that don't exist in these tok_name versions. +if py_version >= 30: + BACKQUOTE = next(_counter) + tok_name[BACKQUOTE] = 'BACKQUOTE' +else: + RARROW = next(_counter) + tok_name[RARROW] = 'RARROW' + ELLIPSIS = next(_counter) + tok_name[ELLIPSIS] = 'ELLIPSIS' + +if py_version < 35: + ATEQUAL = next(_counter) + tok_name[ATEQUAL] = 'ATEQUAL' + +ERROR_DEDENT = next(_counter) +tok_name[ERROR_DEDENT] = 'ERROR_DEDENT' + +FSTRING_START = next(_counter) +tok_name[FSTRING_START] = 'FSTRING_START' +FSTRING_END = next(_counter) +tok_name[FSTRING_END] = 'FSTRING_END' +FSTRING_STRING = next(_counter) +tok_name[FSTRING_STRING] = 'FSTRING_STRING' +EXCLAMATION = next(_counter) +tok_name[EXCLAMATION] = 'EXCLAMATION' + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """\ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +@= ATEQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +... ELLIPSIS +! EXCLAMATION +""" + +opmap = {} +for line in opmap_raw.splitlines(): + op, name = line.split() + opmap[op] = globals()[name] + + +def generate_token_id(string): + """ + Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding + ID for it. The strings are part of the grammar file. + """ + try: + return opmap[string] + except KeyError: + pass + return globals()[string] diff --git a/pythonFiles/parso/python/tokenize.py b/pythonFiles/parso/python/tokenize.py new file mode 100644 index 000000000000..0ac8a8d5275f --- /dev/null +++ b/pythonFiles/parso/python/tokenize.py @@ -0,0 +1,609 @@ +# -*- coding: utf-8 -*- +""" +This tokenizer has been copied from the ``tokenize.py`` standard library +tokenizer. The reason was simple: The standard library tokenizer fails +if the indentation is not right. To make it possible to do error recovery the + tokenizer needed to be rewritten. + +Basically this is a stripped down version of the standard library module, so +you can read the documentation there. Additionally we included some speed and +memory optimizations here. +""" +from __future__ import absolute_import + +import sys +import string +import re +from collections import namedtuple +import itertools as _itertools +from codecs import BOM_UTF8 + +from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, + NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, + ERROR_DEDENT, FSTRING_STRING, FSTRING_START, + FSTRING_END) +from parso._compatibility import py_version +from parso.utils import split_lines + + +TokenCollection = namedtuple( + 'TokenCollection', + 'pseudo_token single_quoted triple_quoted endpats whitespace ' + 'fstring_pattern_map always_break_tokens', +) + +BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') + +_token_collection_cache = {} + +if py_version >= 30: + # Python 3 has str.isidentifier() to check if a char is a valid identifier + is_identifier = str.isidentifier +else: + namechars = string.ascii_letters + '_' + is_identifier = lambda s: s in namechars + + +def group(*choices, **kwargs): + capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :( + assert not kwargs + + start = '(' + if not capture: + start += '?:' + return start + '|'.join(choices) + ')' + + +def maybe(*choices): + return group(*choices) + '?' + + +# Return the empty string, plus all of the valid string prefixes. +def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False): + def different_case_versions(prefix): + for s in _itertools.product(*[(c, c.upper()) for c in prefix]): + yield ''.join(s) + # The valid string prefixes. Only contain the lower case versions, + # and don't contain any permuations (include 'fr', but not + # 'rf'). The various permutations will be generated. + valid_string_prefixes = ['b', 'r', 'u'] + if version_info >= (3, 0): + valid_string_prefixes.append('br') + + result = set(['']) + if version_info >= (3, 6) and include_fstring: + f = ['f', 'fr'] + if only_fstring: + valid_string_prefixes = f + result = set() + else: + valid_string_prefixes += f + elif only_fstring: + return set() + + # if we add binary f-strings, add: ['fb', 'fbr'] + for prefix in valid_string_prefixes: + for t in _itertools.permutations(prefix): + # create a list with upper and lower versions of each + # character + result.update(different_case_versions(t)) + if version_info <= (2, 7): + # In Python 2 the order cannot just be random. + result.update(different_case_versions('ur')) + result.update(different_case_versions('br')) + return result + + +def _compile(expr): + return re.compile(expr, re.UNICODE) + + +def _get_token_collection(version_info): + try: + return _token_collection_cache[tuple(version_info)] + except KeyError: + _token_collection_cache[tuple(version_info)] = result = \ + _create_token_collection(version_info) + return result + + +fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') +fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') + + +def _create_token_collection(version_info): + # Note: we use unicode matching for names ("\w") but ascii matching for + # number literals. + Whitespace = r'[ \f\t]*' + whitespace = _compile(Whitespace) + Comment = r'#[^\r\n]*' + Name = r'\w+' + + if version_info >= (3, 6): + Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' + Binnumber = r'0[bB](?:_?[01])+' + Octnumber = r'0[oO](?:_?[0-7])+' + Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' + Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) + Expfloat = r'[0-9](?:_?[0-9])*' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') + else: + Hexnumber = r'0[xX][0-9a-fA-F]+' + Binnumber = r'0[bB][01]+' + if version_info >= (3, 0): + Octnumber = r'0[oO][0-7]+' + else: + Octnumber = '0[oO]?[0-7]+' + Decnumber = r'(?:0+|[1-9][0-9]*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + Exponent = r'[eE][-+]?[0-9]+' + Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) + Expfloat = r'[0-9]+' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') + Number = group(Imagnumber, Floatnumber, Intnumber) + + # Note that since _all_string_prefixes includes the empty string, + # StringPrefix can be the empty string (making it optional). + possible_prefixes = _all_string_prefixes(version_info) + StringPrefix = group(*possible_prefixes) + StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True)) + fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True) + FStringStart = group(*fstring_prefixes) + + # Tail end of ' string. + Single = r"[^'\\]*(?:\\.[^'\\]*)*'" + # Tail end of " string. + Double = r'[^"\\]*(?:\\.[^"\\]*)*"' + # Tail end of ''' string. + Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" + # Tail end of """ string. + Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' + Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""') + + # Because of leftmost-then-longest match semantics, be sure to put the + # longest operators first (e.g., if = came before ==, == would get + # recognized as two instances of =). + Operator = group(r"\*\*=?", r">>=?", r"<<=?", + r"//=?", r"->", + r"[+\-*/%&@`|^!=<>]=?", + r"~") + + Bracket = '[][(){}]' + + special_args = [r'\r?\n', r'[:;.,@]'] + if version_info >= (3, 0): + special_args.insert(0, r'\.\.\.') + Special = group(*special_args) + + Funny = group(Operator, Bracket, Special) + + # First (or only) line of ' or " string. + ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) + pseudo_extra_pool = [Comment, Triple] + all_quotes = '"', "'", '"""', "'''" + if fstring_prefixes: + pseudo_extra_pool.append(FStringStart + group(*all_quotes)) + + PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool) + PseudoToken = group(Whitespace, capture=True) + \ + group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) + + # For a given string prefix plus quotes, endpats maps it to a regex + # to match the remainder of that string. _prefix can be empty, for + # a normal single or triple quoted string (with no prefix). + endpats = {} + for _prefix in possible_prefixes: + endpats[_prefix + "'"] = _compile(Single) + endpats[_prefix + '"'] = _compile(Double) + endpats[_prefix + "'''"] = _compile(Single3) + endpats[_prefix + '"""'] = _compile(Double3) + + # A set of all of the single and triple quoted string prefixes, + # including the opening quotes. + single_quoted = set() + triple_quoted = set() + fstring_pattern_map = {} + for t in possible_prefixes: + for quote in '"', "'": + single_quoted.add(t + quote) + + for quote in '"""', "'''": + triple_quoted.add(t + quote) + + for t in fstring_prefixes: + for quote in all_quotes: + fstring_pattern_map[t + quote] = quote + + ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', + 'finally', 'while', 'with', 'return') + pseudo_token_compiled = _compile(PseudoToken) + return TokenCollection( + pseudo_token_compiled, single_quoted, triple_quoted, endpats, + whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS + ) + + +class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): + @property + def end_pos(self): + lines = split_lines(self.string) + if len(lines) > 1: + return self.start_pos[0] + len(lines) - 1, 0 + else: + return self.start_pos[0], self.start_pos[1] + len(self.string) + + +class PythonToken(Token): + def _get_type_name(self, exact=True): + return tok_name[self.type] + + def __repr__(self): + return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % + self._replace(type=self._get_type_name())) + + +class FStringNode(object): + def __init__(self, quote): + self.quote = quote + self.parentheses_count = 0 + self.previous_lines = '' + self.last_string_start_pos = None + # In the syntax there can be multiple format_spec's nested: + # {x:{y:3}} + self.format_spec_count = 0 + + def open_parentheses(self, character): + self.parentheses_count += 1 + + def close_parentheses(self, character): + self.parentheses_count -= 1 + + def allow_multiline(self): + return len(self.quote) == 3 + + def is_in_expr(self): + return (self.parentheses_count - self.format_spec_count) > 0 + + +def _check_fstring_ending(fstring_stack, token, from_start=False): + fstring_end = float('inf') + fstring_index = None + for i, node in enumerate(fstring_stack): + if from_start: + if token.startswith(node.quote): + fstring_index = i + fstring_end = len(node.quote) + else: + continue + else: + try: + end = token.index(node.quote) + except ValueError: + pass + else: + if fstring_index is None or end < fstring_end: + fstring_index = i + fstring_end = end + return fstring_index, fstring_end + + +def _find_fstring_string(fstring_stack, line, lnum, pos): + tos = fstring_stack[-1] + if tos.is_in_expr(): + return '', pos + else: + new_pos = pos + allow_multiline = tos.allow_multiline() + if allow_multiline: + match = fstring_string_multi_line.match(line, pos) + else: + match = fstring_string_single_line.match(line, pos) + if match is None: + string = tos.previous_lines + else: + if not tos.previous_lines: + tos.last_string_start_pos = (lnum, pos) + + string = match.group(0) + for fstring_stack_node in fstring_stack: + try: + string = string[:string.index(fstring_stack_node.quote)] + except ValueError: + pass # The string was not found. + + new_pos += len(string) + if allow_multiline and string.endswith('\n'): + tos.previous_lines += string + string = '' + else: + string = tos.previous_lines + string + + return string, new_pos + + +def tokenize(code, version_info, start_pos=(1, 0)): + """Generate tokens from a the source code (string).""" + lines = split_lines(code, keepends=True) + return tokenize_lines(lines, version_info, start_pos=start_pos) + + +def _print_tokens(func): + """ + A small helper function to help debug the tokenize_lines function. + """ + def wrapper(*args, **kwargs): + for token in func(*args, **kwargs): + print(token) + yield token + + return wrapper + + +# @_print_tokens +def tokenize_lines(lines, version_info, start_pos=(1, 0)): + """ + A heavily modified Python standard library tokenizer. + + Additionally to the default information, yields also the prefix of each + token. This idea comes from lib2to3. The prefix contains all information + that is irrelevant for the parser like newlines in parentheses or comments. + """ + pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ + fstring_pattern_map, always_break_tokens, = \ + _get_token_collection(version_info) + paren_level = 0 # count parentheses + indents = [0] + max = 0 + numchars = '0123456789' + contstr = '' + contline = None + # We start with a newline. This makes indent at the first position + # possible. It's not valid Python, but still better than an INDENT in the + # second line (and not in the first). This makes quite a few things in + # Jedi's fast parser possible. + new_line = True + prefix = '' # Should never be required, but here for safety + additional_prefix = '' + first = True + lnum = start_pos[0] - 1 + fstring_stack = [] + for line in lines: # loop over lines in stream + lnum += 1 + pos = 0 + max = len(line) + if first: + if line.startswith(BOM_UTF8_STRING): + additional_prefix = BOM_UTF8_STRING + line = line[1:] + max = len(line) + + # Fake that the part before was already parsed. + line = '^' * start_pos[1] + line + pos = start_pos[1] + max += start_pos[1] + + first = False + + if contstr: # continued string + endmatch = endprog.match(line) + if endmatch: + pos = endmatch.end(0) + yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) + contstr = '' + contline = None + else: + contstr = contstr + line + contline = contline + line + continue + + while pos < max: + if fstring_stack: + string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) + if string: + yield PythonToken( + FSTRING_STRING, string, + fstring_stack[-1].last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + fstring_stack[-1].previous_lines = '' + continue + + if pos == max: + break + + rest = line[pos:] + fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True) + + if fstring_index is not None: + yield PythonToken( + FSTRING_END, + fstring_stack[fstring_index].quote, + (lnum, pos), + prefix=additional_prefix, + ) + additional_prefix = '' + del fstring_stack[fstring_index:] + pos += end + continue + + pseudomatch = pseudo_token.match(line, pos) + if not pseudomatch: # scan for tokens + if line.endswith('\n'): + new_line = True + match = whitespace.match(line, pos) + pos = match.end() + yield PythonToken( + ERRORTOKEN, line[pos:], (lnum, pos), + additional_prefix + match.group(0) + ) + additional_prefix = '' + break + + prefix = additional_prefix + pseudomatch.group(1) + additional_prefix = '' + start, pos = pseudomatch.span(2) + spos = (lnum, start) + token = pseudomatch.group(2) + if token == '': + assert prefix + additional_prefix = prefix + # This means that we have a line with whitespace/comments at + # the end, which just results in an endmarker. + break + initial = token[0] + + if new_line and initial not in '\r\n#': + new_line = False + if paren_level == 0 and not fstring_stack: + i = 0 + while line[i] == '\f': + i += 1 + # TODO don't we need to change spos as well? + start -= 1 + if start > indents[-1]: + yield PythonToken(INDENT, '', spos, '') + indents.append(start) + while start < indents[-1]: + if start > indents[-2]: + yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') + break + yield PythonToken(DEDENT, '', spos, '') + indents.pop() + + if fstring_stack: + fstring_index, end = _check_fstring_ending(fstring_stack, token) + if fstring_index is not None: + if end != 0: + yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) + prefix = '' + + yield PythonToken( + FSTRING_END, + fstring_stack[fstring_index].quote, + (lnum, spos[1] + 1), + prefix=prefix + ) + del fstring_stack[fstring_index:] + pos -= len(token) - end + continue + + if (initial in numchars or # ordinary number + (initial == '.' and token != '.' and token != '...')): + yield PythonToken(NUMBER, token, spos, prefix) + elif initial in '\r\n': + if any(not f.allow_multiline() for f in fstring_stack): + # Would use fstring_stack.clear, but that's not available + # in Python 2. + fstring_stack[:] = [] + + if not new_line and paren_level == 0 and not fstring_stack: + yield PythonToken(NEWLINE, token, spos, prefix) + else: + additional_prefix = prefix + token + new_line = True + elif initial == '#': # Comments + assert not token.endswith("\n") + additional_prefix = prefix + token + elif token in triple_quoted: + endprog = endpats[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield PythonToken(STRING, token, spos, prefix) + else: + contstr_start = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + contstr_start = lnum, start + endprog = (endpats.get(initial) or endpats.get(token[1]) + or endpats.get(token[2])) + contstr = line[start:] + contline = line + break + else: # ordinary string + yield PythonToken(STRING, token, spos, prefix) + elif token in fstring_pattern_map: # The start of an fstring. + fstring_stack.append(FStringNode(fstring_pattern_map[token])) + yield PythonToken(FSTRING_START, token, spos, prefix) + elif is_identifier(initial): # ordinary name + if token in always_break_tokens: + fstring_stack[:] = [] + paren_level = 0 + while True: + indent = indents.pop() + if indent > start: + yield PythonToken(DEDENT, '', spos, '') + else: + indents.append(indent) + break + yield PythonToken(NAME, token, spos, prefix) + elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt + additional_prefix += prefix + line[start:] + break + else: + if token in '([{': + if fstring_stack: + fstring_stack[-1].open_parentheses(token) + else: + paren_level += 1 + elif token in ')]}': + if fstring_stack: + fstring_stack[-1].close_parentheses(token) + else: + paren_level -= 1 + elif token == ':' and fstring_stack \ + and fstring_stack[-1].parentheses_count == 1: + fstring_stack[-1].format_spec_count += 1 + + try: + # This check is needed in any case to check if it's a valid + # operator or just some random unicode character. + typ = opmap[token] + except KeyError: + typ = ERRORTOKEN + yield PythonToken(typ, token, spos, prefix) + + if contstr: + yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) + if contstr.endswith('\n'): + new_line = True + + end_pos = lnum, max + # As the last position we just take the maximally possible position. We + # remove -1 for the last new line. + for indent in indents[1:]: + yield PythonToken(DEDENT, '', end_pos, '') + yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) + + +if __name__ == "__main__": + if len(sys.argv) >= 2: + path = sys.argv[1] + with open(path) as f: + code = f.read() + else: + code = sys.stdin.read() + + from parso.utils import python_bytes_to_unicode, parse_version_string + + if isinstance(code, bytes): + code = python_bytes_to_unicode(code) + + for token in tokenize(code, parse_version_string()): + print(token) diff --git a/pythonFiles/parso/python/tree.py b/pythonFiles/parso/python/tree.py new file mode 100644 index 000000000000..f6b4dd38fad0 --- /dev/null +++ b/pythonFiles/parso/python/tree.py @@ -0,0 +1,1194 @@ +""" +This is the syntax tree for Python syntaxes (2 & 3). The classes represent +syntax elements like functions and imports. + +All of the nodes can be traced back to the `Python grammar file +`_. If you want to know how +a tree is structured, just analyse that file (for each Python version it's a +bit different). + +There's a lot of logic here that makes it easier for Jedi (and other libraries) +to deal with a Python syntax tree. + +By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get +back the 1-to-1 representation of the input given to the parser. This is +important if you want to refactor a parser tree. + +>>> from parso import parse +>>> parser = parse('import os') +>>> module = parser.get_root_node() +>>> module + + +Any subclasses of :class:`Scope`, including :class:`Module` has an attribute +:attr:`iter_imports `: + +>>> list(module.iter_imports()) +[] + +Changes to the Python Grammar +----------------------------- + +A few things have changed when looking at Python grammar files: + +- :class:`Param` does not exist in Python grammar files. It is essentially a + part of a ``parameters`` node. |parso| splits it up to make it easier to + analyse parameters. However this just makes it easier to deal with the syntax + tree, it doesn't actually change the valid syntax. +- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the + syntax tree to make it easier to do deal with them. + +Parser Tree Classes +------------------- +""" + +import re + +from parso._compatibility import utf8_repr, unicode +from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ + search_ancestor +from parso.python.prefix import split_prefix + +_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', + 'with_stmt', 'async_stmt', 'suite']) +_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS +_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS +_GET_DEFINITION_TYPES = set([ + 'expr_stmt', 'comp_for', 'with_stmt', 'for_stmt', 'import_name', + 'import_from', 'param' +]) +_IMPORTS = set(['import_name', 'import_from']) + + +class DocstringMixin(object): + __slots__ = () + + def get_doc_node(self): + """ + Returns the string leaf of a docstring. e.g. ``r'''foo'''``. + """ + if self.type == 'file_input': + node = self.children[0] + elif self.type in ('funcdef', 'classdef'): + node = self.children[self.children.index(':') + 1] + if node.type == 'suite': # Normally a suite + node = node.children[1] # -> NEWLINE stmt + else: # ExprStmt + simple_stmt = self.parent + c = simple_stmt.parent.children + index = c.index(simple_stmt) + if not index: + return None + node = c[index - 1] + + if node.type == 'simple_stmt': + node = node.children[0] + if node.type == 'string': + return node + return None + + +class PythonMixin(object): + """ + Some Python specific utitilies. + """ + __slots__ = () + + def get_name_of_position(self, position): + """ + Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if + there is no name at that position. + """ + for c in self.children: + if isinstance(c, Leaf): + if c.type == 'name' and c.start_pos <= position <= c.end_pos: + return c + else: + result = c.get_name_of_position(position) + if result is not None: + return result + return None + + +class PythonLeaf(PythonMixin, Leaf): + __slots__ = () + + def _split_prefix(self): + return split_prefix(self, self.get_start_pos_of_prefix()) + + def get_start_pos_of_prefix(self): + """ + Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`. + """ + # TODO it is really ugly that we have to override it. Maybe change + # indent error leafs somehow? No idea how, though. + previous_leaf = self.get_previous_leaf() + if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ + and previous_leaf.original_type in ('indent', 'error_dedent'): + previous_leaf = previous_leaf.get_previous_leaf() + + if previous_leaf is None: + return self.line - self.prefix.count('\n'), 0 # It's the first leaf. + return previous_leaf.end_pos + + +class _LeafWithoutNewlines(PythonLeaf): + """ + Simply here to optimize performance. + """ + __slots__ = () + + @property + def end_pos(self): + return self.line, self.column + len(self.value) + + +# Python base classes +class PythonBaseNode(PythonMixin, BaseNode): + __slots__ = () + + +class PythonNode(PythonMixin, Node): + __slots__ = () + + +class PythonErrorNode(PythonMixin, ErrorNode): + __slots__ = () + + +class PythonErrorLeaf(ErrorLeaf, PythonLeaf): + __slots__ = () + + +class EndMarker(_LeafWithoutNewlines): + __slots__ = () + type = 'endmarker' + + @utf8_repr + def __repr__(self): + return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix)) + + +class Newline(PythonLeaf): + """Contains NEWLINE and ENDMARKER tokens.""" + __slots__ = () + type = 'newline' + + @utf8_repr + def __repr__(self): + return "<%s: %s>" % (type(self).__name__, repr(self.value)) + + +class Name(_LeafWithoutNewlines): + """ + A string. Sometimes it is important to know if the string belongs to a name + or not. + """ + type = 'name' + __slots__ = () + + def __repr__(self): + return "<%s: %s@%s,%s>" % (type(self).__name__, self.value, + self.line, self.column) + + def is_definition(self): + """ + Returns True if the name is being defined. + """ + return self.get_definition() is not None + + def get_definition(self, import_name_always=False): + """ + Returns None if there's on definition for a name. + + :param import_name_alway: Specifies if an import name is always a + definition. Normally foo in `from foo import bar` is not a + definition. + """ + node = self.parent + type_ = node.type + if type_ in ('power', 'atom_expr'): + # In `self.x = 3` self is not a definition, but x is. + return None + + if type_ in ('funcdef', 'classdef'): + if self == node.name: + return node + return None + + if type_ == 'except_clause': + # TODO in Python 2 this doesn't work correctly. See grammar file. + # I think we'll just let it be. Python 2 will be gone in a few + # years. + if self.get_previous_sibling() == 'as': + return node.parent # The try_stmt. + return None + + while node is not None: + if node.type == 'suite': + return None + if node.type in _GET_DEFINITION_TYPES: + if self in node.get_defined_names(): + return node + if import_name_always and node.type in _IMPORTS: + return node + return None + node = node.parent + return None + + +class Literal(PythonLeaf): + __slots__ = () + + +class Number(Literal): + type = 'number' + __slots__ = () + + +class String(Literal): + type = 'string' + __slots__ = () + + @property + def string_prefix(self): + return re.match('\w*(?=[\'"])', self.value).group(0) + + def _get_payload(self): + match = re.search( + r'''('{3}|"{3}|'|")(.*)$''', + self.value, + flags=re.DOTALL + ) + return match.group(2)[:-len(match.group(1))] + + +class FStringString(Leaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_string' + __slots__ = () + + +class FStringStart(Leaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_start' + __slots__ = () + + +class FStringEnd(Leaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_end' + __slots__ = () + + +class _StringComparisonMixin(object): + def __eq__(self, other): + """ + Make comparisons with strings easy. + Improves the readability of the parser. + """ + if isinstance(other, (str, unicode)): + return self.value == other + + return self is other + + def __ne__(self, other): + """Python 2 compatibility.""" + return not self.__eq__(other) + + def __hash__(self): + return hash(self.value) + + +class Operator(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'operator' + __slots__ = () + + +class Keyword(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'keyword' + __slots__ = () + + +class Scope(PythonBaseNode, DocstringMixin): + """ + Super class for the parser tree, which represents the state of a python + text file. + A Scope is either a function, class or lambda. + """ + __slots__ = () + + def __init__(self, children): + super(Scope, self).__init__(children) + + def iter_funcdefs(self): + """ + Returns a generator of `funcdef` nodes. + """ + return self._search_in_scope('funcdef') + + def iter_classdefs(self): + """ + Returns a generator of `classdef` nodes. + """ + return self._search_in_scope('classdef') + + def iter_imports(self): + """ + Returns a generator of `import_name` and `import_from` nodes. + """ + return self._search_in_scope('import_name', 'import_from') + + def _search_in_scope(self, *names): + def scan(children): + for element in children: + if element.type in names: + yield element + if element.type in _FUNC_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def get_suite(self): + """ + Returns the part that is executed by the function. + """ + return self.children[-1] + + def __repr__(self): + try: + name = self.name.value + except AttributeError: + name = '' + + return "<%s: %s@%s-%s>" % (type(self).__name__, name, + self.start_pos[0], self.end_pos[0]) + + +class Module(Scope): + """ + The top scope, which is always a module. + Depending on the underlying parser this may be a full module or just a part + of a module. + """ + __slots__ = ('_used_names',) + type = 'file_input' + + def __init__(self, children): + super(Module, self).__init__(children) + self._used_names = None + + def _iter_future_import_names(self): + """ + :return: A list of future import names. + :rtype: list of str + """ + # In Python it's not allowed to use future imports after the first + # actual (non-future) statement. However this is not a linter here, + # just return all future imports. If people want to scan for issues + # they should use the API. + for imp in self.iter_imports(): + if imp.type == 'import_from' and imp.level == 0: + for path in imp.get_paths(): + names = [name.value for name in path] + if len(names) == 2 and names[0] == '__future__': + yield names[1] + + def _has_explicit_absolute_import(self): + """ + Checks if imports in this module are explicitly absolute, i.e. there + is a ``__future__`` import. + Currently not public, might be in the future. + :return bool: + """ + for name in self._iter_future_import_names(): + if name == 'absolute_import': + return True + return False + + def get_used_names(self): + """ + Returns all the :class:`Name` leafs that exist in this module. This + includes both definitions and references of names. + """ + if self._used_names is None: + # Don't directly use self._used_names to eliminate a lookup. + dct = {} + + def recurse(node): + try: + children = node.children + except AttributeError: + if node.type == 'name': + arr = dct.setdefault(node.value, []) + arr.append(node) + else: + for child in children: + recurse(child) + + recurse(self) + self._used_names = dct + return self._used_names + + +class Decorator(PythonBaseNode): + type = 'decorator' + __slots__ = () + + +class ClassOrFunc(Scope): + __slots__ = () + + @property + def name(self): + """ + Returns the `Name` leaf that defines the function or class name. + """ + return self.children[1] + + def get_decorators(self): + """ + :rtype: list of :class:`Decorator` + """ + decorated = self.parent + if decorated.type == 'decorated': + if decorated.children[0].type == 'decorators': + return decorated.children[0].children + else: + return decorated.children[:1] + else: + return [] + + +class Class(ClassOrFunc): + """ + Used to store the parsed contents of a python class. + """ + type = 'classdef' + __slots__ = () + + def __init__(self, children): + super(Class, self).__init__(children) + + def get_super_arglist(self): + """ + Returns the `arglist` node that defines the super classes. It returns + None if there are no arguments. + """ + if self.children[2] != '(': # Has no parentheses + return None + else: + if self.children[3] == ')': # Empty parentheses + return None + else: + return self.children[3] + + +def _create_params(parent, argslist_list): + """ + `argslist_list` is a list that can contain an argslist as a first item, but + most not. It's basically the items between the parameter brackets (which is + at most one item). + This function modifies the parser structure. It generates `Param` objects + from the normal ast. Those param objects do not exist in a normal ast, but + make the evaluation of the ast tree so much easier. + You could also say that this function replaces the argslist node with a + list of Param objects. + """ + def check_python2_nested_param(node): + """ + Python 2 allows params to look like ``def x(a, (b, c))``, which is + basically a way of unpacking tuples in params. Python 3 has ditched + this behavior. Jedi currently just ignores those constructs. + """ + return node.type == 'fpdef' and node.children[0] == '(' + + try: + first = argslist_list[0] + except IndexError: + return [] + + if first.type in ('name', 'fpdef'): + if check_python2_nested_param(first): + return [first] + else: + return [Param([first], parent)] + elif first == '*': + return [first] + else: # argslist is a `typedargslist` or a `varargslist`. + if first.type == 'tfpdef': + children = [first] + else: + children = first.children + new_children = [] + start = 0 + # Start with offset 1, because the end is higher. + for end, child in enumerate(children + [None], 1): + if child is None or child == ',': + param_children = children[start:end] + if param_children: # Could as well be comma and then end. + if param_children[0] == '*' and param_children[1] == ',' \ + or check_python2_nested_param(param_children[0]): + for p in param_children: + p.parent = parent + new_children += param_children + else: + new_children.append(Param(param_children, parent)) + start = end + return new_children + + +class Function(ClassOrFunc): + """ + Used to store the parsed contents of a python function. + + Children:: + + 0. + 1. + 2. parameter list (including open-paren and close-paren s) + 3. or 5. + 4. or 6. Node() representing function body + 3. -> (if annotation is also present) + 4. annotation (if present) + """ + type = 'funcdef' + + def __init__(self, children): + super(Function, self).__init__(children) + parameters = self.children[2] # After `def foo` + parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) + + def _get_param_nodes(self): + return self.children[2].children + + def get_params(self): + """ + Returns a list of `Param()`. + """ + return [p for p in self._get_param_nodes() if p.type == 'param'] + + @property + def name(self): + return self.children[1] # First token after `def` + + def iter_yield_exprs(self): + """ + Returns a generator of `yield_expr`. + """ + def scan(children): + for element in children: + if element.type in ('classdef', 'funcdef', 'lambdef'): + continue + + try: + nested_children = element.children + except AttributeError: + if element.value == 'yield': + if element.parent.type == 'yield_expr': + yield element.parent + else: + yield element + else: + for result in scan(nested_children): + yield result + + return scan(self.children) + + def iter_return_stmts(self): + """ + Returns a generator of `return_stmt`. + """ + def scan(children): + for element in children: + if element.type == 'return_stmt' \ + or element.type == 'keyword' and element.value == 'return': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def iter_raise_stmts(self): + """ + Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks + """ + def scan(children): + for element in children: + if element.type == 'raise_stmt' \ + or element.type == 'keyword' and element.value == 'raise': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def is_generator(self): + """ + :return bool: Checks if a function is a generator or not. + """ + return next(self.iter_yield_exprs(), None) is not None + + @property + def annotation(self): + """ + Returns the test node after `->` or `None` if there is no annotation. + """ + try: + if self.children[3] == "->": + return self.children[4] + assert self.children[3] == ":" + return None + except IndexError: + return None + + +class Lambda(Function): + """ + Lambdas are basically trimmed functions, so give it the same interface. + + Children:: + + 0. + *. for each argument x + -2. + -1. Node() representing body + """ + type = 'lambdef' + __slots__ = () + + def __init__(self, children): + # We don't want to call the Function constructor, call its parent. + super(Function, self).__init__(children) + # Everything between `lambda` and the `:` operator is a parameter. + self.children[1:-2] = _create_params(self, self.children[1:-2]) + + @property + def name(self): + """ + Raises an AttributeError. Lambdas don't have a defined name. + """ + raise AttributeError("lambda is not named.") + + def _get_param_nodes(self): + return self.children[1:-2] + + @property + def annotation(self): + """ + Returns `None`, lambdas don't have annotations. + """ + return None + + def __repr__(self): + return "<%s@%s>" % (self.__class__.__name__, self.start_pos) + + +class Flow(PythonBaseNode): + __slots__ = () + + +class IfStmt(Flow): + type = 'if_stmt' + __slots__ = () + + def get_test_nodes(self): + """ + E.g. returns all the `test` nodes that are named as x, below: + + if x: + pass + elif x: + pass + """ + for i, c in enumerate(self.children): + if c in ('elif', 'if'): + yield self.children[i + 1] + + def get_corresponding_test_node(self, node): + """ + Searches for the branch in which the node is and returns the + corresponding test node (see function above). However if the node is in + the test node itself and not in the suite return None. + """ + start_pos = node.start_pos + for check_node in reversed(list(self.get_test_nodes())): + if check_node.start_pos < start_pos: + if start_pos < check_node.end_pos: + return None + # In this case the node is within the check_node itself, + # not in the suite + else: + return check_node + + def is_node_after_else(self, node): + """ + Checks if a node is defined after `else`. + """ + for c in self.children: + if c == 'else': + if node.start_pos > c.start_pos: + return True + else: + return False + + +class WhileStmt(Flow): + type = 'while_stmt' + __slots__ = () + + +class ForStmt(Flow): + type = 'for_stmt' + __slots__ = () + + def get_testlist(self): + """ + Returns the input node ``y`` from: ``for x in y:``. + """ + return self.children[3] + + def get_defined_names(self): + return _defined_names(self.children[1]) + + +class TryStmt(Flow): + type = 'try_stmt' + __slots__ = () + + def get_except_clause_tests(self): + """ + Returns the ``test`` nodes found in ``except_clause`` nodes. + Returns ``[None]`` for except clauses without an exception given. + """ + for node in self.children: + if node.type == 'except_clause': + yield node.children[1] + elif node == 'except': + yield None + + +class WithStmt(Flow): + type = 'with_stmt' + __slots__ = () + + def get_defined_names(self): + """ + Returns the a list of `Name` that the with statement defines. The + defined names are set after `as`. + """ + names = [] + for with_item in self.children[1:-2:2]: + # Check with items for 'as' names. + if with_item.type == 'with_item': + names += _defined_names(with_item.children[2]) + return names + + def get_test_node_from_name(self, name): + node = name.parent + if node.type != 'with_item': + raise ValueError('The name is not actually part of a with statement.') + return node.children[0] + + +class Import(PythonBaseNode): + __slots__ = () + + def get_path_for_name(self, name): + """ + The path is the list of names that leads to the searched name. + + :return list of Name: + """ + try: + # The name may be an alias. If it is, just map it back to the name. + name = self._aliases()[name] + except KeyError: + pass + + for path in self.get_paths(): + if name in path: + return path[:path.index(name) + 1] + raise ValueError('Name should be defined in the import itself') + + def is_nested(self): + return False # By default, sub classes may overwrite this behavior + + def is_star_import(self): + return self.children[-1] == '*' + + +class ImportFrom(Import): + type = 'import_from' + __slots__ = () + + def get_defined_names(self): + """ + Returns the a list of `Name` that the import defines. The + defined names are set after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or name for name, alias in self._as_name_tuples()] + + def _aliases(self): + """Mapping from alias to its corresponding name.""" + return dict((alias, name) for name, alias in self._as_name_tuples() + if alias is not None) + + def get_from_names(self): + for n in self.children[1:]: + if n not in ('.', '...'): + break + if n.type == 'dotted_name': # from x.y import + return n.children[::2] + elif n == 'import': # from . import + return [] + else: # from x import + return [n] + + @property + def level(self): + """The level parameter of ``__import__``.""" + level = 0 + for n in self.children[1:]: + if n in ('.', '...'): + level += len(n.value) + else: + break + return level + + def _as_name_tuples(self): + last = self.children[-1] + if last == ')': + last = self.children[-2] + elif last == '*': + return # No names defined directly. + + if last.type == 'import_as_names': + as_names = last.children[::2] + else: + as_names = [last] + for as_name in as_names: + if as_name.type == 'name': + yield as_name, None + else: + yield as_name.children[::2] # yields x, y -> ``x as y`` + + def get_paths(self): + """ + The import paths defined in an import statement. Typically an array + like this: ``[, ]``. + + :return list of list of Name: + """ + dotted = self.get_from_names() + + if self.children[-1] == '*': + return [dotted] + return [dotted + [name] for name, alias in self._as_name_tuples()] + + +class ImportName(Import): + """For ``import_name`` nodes. Covers normal imports without ``from``.""" + type = 'import_name' + __slots__ = () + + def get_defined_names(self): + """ + Returns the a list of `Name` that the import defines. The defined names + is always the first name after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or path[0] for path, alias in self._dotted_as_names()] + + @property + def level(self): + """The level parameter of ``__import__``.""" + return 0 # Obviously 0 for imports without from. + + def get_paths(self): + return [path for path, alias in self._dotted_as_names()] + + def _dotted_as_names(self): + """Generator of (list(path), alias) where alias may be None.""" + dotted_as_names = self.children[1] + if dotted_as_names.type == 'dotted_as_names': + as_names = dotted_as_names.children[::2] + else: + as_names = [dotted_as_names] + + for as_name in as_names: + if as_name.type == 'dotted_as_name': + alias = as_name.children[2] + as_name = as_name.children[0] + else: + alias = None + if as_name.type == 'name': + yield [as_name], alias + else: + # dotted_names + yield as_name.children[::2], alias + + def is_nested(self): + """ + This checks for the special case of nested imports, without aliases and + from statement:: + + import foo.bar + """ + return bool([1 for path, alias in self._dotted_as_names() + if alias is None and len(path) > 1]) + + def _aliases(self): + """ + :return list of Name: Returns all the alias + """ + return dict((alias, path[-1]) for path, alias in self._dotted_as_names() + if alias is not None) + + +class KeywordStatement(PythonBaseNode): + """ + For the following statements: `assert`, `del`, `global`, `nonlocal`, + `raise`, `return`, `yield`, `return`, `yield`. + + `pass`, `continue` and `break` are not in there, because they are just + simple keywords and the parser reduces it to a keyword. + """ + __slots__ = () + + @property + def type(self): + """ + Keyword statements start with the keyword and end with `_stmt`. You can + crosscheck this with the Python grammar. + """ + return '%s_stmt' % self.keyword + + @property + def keyword(self): + return self.children[0].value + + +class AssertStmt(KeywordStatement): + __slots__ = () + + @property + def assertion(self): + return self.children[1] + + +class GlobalStmt(KeywordStatement): + __slots__ = () + + def get_global_names(self): + return self.children[1::2] + + +class ReturnStmt(KeywordStatement): + __slots__ = () + + +class YieldExpr(PythonBaseNode): + type = 'yield_expr' + __slots__ = () + + +def _defined_names(current): + """ + A helper function to find the defined names in statements, for loops and + list comprehensions. + """ + names = [] + if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'): + for child in current.children[::2]: + names += _defined_names(child) + elif current.type in ('atom', 'star_expr'): + names += _defined_names(current.children[1]) + elif current.type in ('power', 'atom_expr'): + if current.children[-2] != '**': # Just if there's no operation + trailer = current.children[-1] + if trailer.children[0] == '.': + names.append(trailer.children[1]) + else: + names.append(current) + return names + + +class ExprStmt(PythonBaseNode, DocstringMixin): + type = 'expr_stmt' + __slots__ = () + + def get_defined_names(self): + """ + Returns a list of `Name` defined before the `=` sign. + """ + names = [] + if self.children[1].type == 'annassign': + names = _defined_names(self.children[0]) + return [ + name + for i in range(0, len(self.children) - 2, 2) + if '=' in self.children[i + 1].value + for name in _defined_names(self.children[i]) + ] + names + + def get_rhs(self): + """Returns the right-hand-side of the equals.""" + return self.children[-1] + + def yield_operators(self): + """ + Returns a generator of `+=`, `=`, etc. or None if there is no operation. + """ + first = self.children[1] + if first.type == 'annassign': + if len(first.children) <= 2: + return # No operator is available, it's just PEP 484. + + first = first.children[2] + yield first + + for operator in self.children[3::2]: + yield operator + + +class Param(PythonBaseNode): + """ + It's a helper class that makes business logic with params much easier. The + Python grammar defines no ``param`` node. It defines it in a different way + that is not really suited to working with parameters. + """ + type = 'param' + + def __init__(self, children, parent): + super(Param, self).__init__(children) + self.parent = parent + for child in children: + child.parent = self + + @property + def star_count(self): + """ + Is `0` in case of `foo`, `1` in case of `*foo` or `2` in case of + `**foo`. + """ + first = self.children[0] + if first in ('*', '**'): + return len(first.value) + return 0 + + @property + def default(self): + """ + The default is the test node that appears after the `=`. Is `None` in + case no default is present. + """ + has_comma = self.children[-1] == ',' + try: + if self.children[-2 - int(has_comma)] == '=': + return self.children[-1 - int(has_comma)] + except IndexError: + return None + + @property + def annotation(self): + """ + The default is the test node that appears after `:`. Is `None` in case + no annotation is present. + """ + tfpdef = self._tfpdef() + if tfpdef.type == 'tfpdef': + assert tfpdef.children[1] == ":" + assert len(tfpdef.children) == 3 + annotation = tfpdef.children[2] + return annotation + else: + return None + + def _tfpdef(self): + """ + tfpdef: see e.g. grammar36.txt. + """ + offset = int(self.children[0] in ('*', '**')) + return self.children[offset] + + @property + def name(self): + """ + The `Name` leaf of the param. + """ + if self._tfpdef().type == 'tfpdef': + return self._tfpdef().children[0] + else: + return self._tfpdef() + + def get_defined_names(self): + return [self.name] + + @property + def position_index(self): + """ + Property for the positional index of a paramter. + """ + index = self.parent.children.index(self) + try: + keyword_only_index = self.parent.children.index('*') + if index > keyword_only_index: + # Skip the ` *, ` + index -= 2 + except ValueError: + pass + return index - 1 + + def get_parent_function(self): + """ + Returns the function/lambda of a parameter. + """ + return search_ancestor(self, 'funcdef', 'lambdef') + + def get_code(self, include_prefix=True, include_comma=True): + """ + Like all the other get_code functions, but includes the param + `include_comma`. + + :param include_comma bool: If enabled includes the comma in the string output. + """ + if include_comma: + return super(Param, self).get_code(include_prefix) + + children = self.children + if children[-1] == ',': + children = children[:-1] + return self._get_code_for_children( + children, + include_prefix=include_prefix + ) + + def __repr__(self): + default = '' if self.default is None else '=%s' % self.default.get_code() + return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) + + +class CompFor(PythonBaseNode): + type = 'comp_for' + __slots__ = () + + def get_defined_names(self): + """ + Returns the a list of `Name` that the comprehension defines. + """ + # allow async for + return _defined_names(self.children[self.children.index('for') + 1]) diff --git a/pythonFiles/parso/tree.py b/pythonFiles/parso/tree.py new file mode 100644 index 000000000000..5316795be57c --- /dev/null +++ b/pythonFiles/parso/tree.py @@ -0,0 +1,363 @@ +from abc import abstractmethod, abstractproperty +from parso._compatibility import utf8_repr, encoding, py_version + + +def search_ancestor(node, *node_types): + """ + Recursively looks at the parents of a node and returns the first found node + that matches node_types. Returns ``None`` if no matching node is found. + + :param node: The ancestors of this node will be checked. + :param node_types: type names that are searched for. + :type node_types: tuple of str + """ + while True: + node = node.parent + if node is None or node.type in node_types: + return node + + +class NodeOrLeaf(object): + """ + The base class for nodes and leaves. + """ + __slots__ = () + type = None + ''' + The type is a string that typically matches the types of the grammar file. + ''' + + def get_root_node(self): + """ + Returns the root node of a parser tree. The returned node doesn't have + a parent node like all the other nodes/leaves. + """ + scope = self + while scope.parent is not None: + scope = scope.parent + return scope + + def get_next_sibling(self): + """ + Returns the node immediately following this node in this parent's + children list. If this node does not have a next sibling, it is None + """ + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i + 1] + except IndexError: + return None + + def get_previous_sibling(self): + """ + Returns the node immediately preceding this node in this parent's + children list. If this node does not have a previous sibling, it is + None. + """ + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i - 1] + + def get_previous_leaf(self): + """ + Returns the previous leaf in the parser tree. + Returns `None` if this is the first element in the parser tree. + """ + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == 0: + node = node.parent + if node.parent is None: + return None + else: + node = c[i - 1] + break + + while True: + try: + node = node.children[-1] + except AttributeError: # A Leaf doesn't have children. + return node + + def get_next_leaf(self): + """ + Returns the next leaf in the parser tree. + Returns None if this is the last element in the parser tree. + """ + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == len(c) - 1: + node = node.parent + if node.parent is None: + return None + else: + node = c[i + 1] + break + + while True: + try: + node = node.children[0] + except AttributeError: # A Leaf doesn't have children. + return node + + @abstractproperty + def start_pos(self): + """ + Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractproperty + def end_pos(self): + """ + Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_start_pos_of_prefix(self): + """ + Returns the start_pos of the prefix. This means basically it returns + the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the + prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is + `(1, 2)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_first_leaf(self): + """ + Returns the first leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_last_leaf(self): + """ + Returns the last leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_code(self, include_prefix=True): + """ + Returns the code that was input the input for the parser for this node. + + :param include_prefix: Removes the prefix (whitespace and comments) of + e.g. a statement. + """ + + +class Leaf(NodeOrLeaf): + ''' + Leafs are basically tokens with a better API. Leafs exactly know where they + were defined and what text preceeds them. + ''' + __slots__ = ('value', 'parent', 'line', 'column', 'prefix') + + def __init__(self, value, start_pos, prefix=''): + self.value = value + ''' + :py:func:`str` The value of the current token. + ''' + self.start_pos = start_pos + self.prefix = prefix + ''' + :py:func:`str` Typically a mixture of whitespace and comments. Stuff + that is syntactically irrelevant for the syntax tree. + ''' + self.parent = None + ''' + The parent :class:`BaseNode` of this leaf. + ''' + + @property + def start_pos(self): + return self.line, self.column + + @start_pos.setter + def start_pos(self, value): + self.line = value[0] + self.column = value[1] + + def get_start_pos_of_prefix(self): + previous_leaf = self.get_previous_leaf() + if previous_leaf is None: + return self.line - self.prefix.count('\n'), 0 # It's the first leaf. + return previous_leaf.end_pos + + def get_first_leaf(self): + return self + + def get_last_leaf(self): + return self + + def get_code(self, include_prefix=True): + if include_prefix: + return self.prefix + self.value + else: + return self.value + + @property + def end_pos(self): + lines = self.value.split('\n') + end_pos_line = self.line + len(lines) - 1 + # Check for multiline token + if self.line == end_pos_line: + end_pos_column = self.column + len(lines[-1]) + else: + end_pos_column = len(lines[-1]) + return end_pos_line, end_pos_column + + @utf8_repr + def __repr__(self): + value = self.value + if not value: + value = self.type + return "<%s: %s>" % (type(self).__name__, value) + + +class TypedLeaf(Leaf): + __slots__ = ('type',) + def __init__(self, type, value, start_pos, prefix=''): + super(TypedLeaf, self).__init__(value, start_pos, prefix) + self.type = type + + +class BaseNode(NodeOrLeaf): + """ + The super class for all nodes. + A node has children, a type and possibly a parent node. + """ + __slots__ = ('children', 'parent') + type = None + + def __init__(self, children): + for c in children: + c.parent = self + self.children = children + """ + A list of :class:`NodeOrLeaf` child nodes. + """ + self.parent = None + ''' + The parent :class:`BaseNode` of this leaf. + None if this is the root node. + ''' + + @property + def start_pos(self): + return self.children[0].start_pos + + def get_start_pos_of_prefix(self): + return self.children[0].get_start_pos_of_prefix() + + @property + def end_pos(self): + return self.children[-1].end_pos + + def _get_code_for_children(self, children, include_prefix): + if include_prefix: + return "".join(c.get_code() for c in children) + else: + first = children[0].get_code(include_prefix=False) + return first + "".join(c.get_code() for c in children[1:]) + + def get_code(self, include_prefix=True): + return self._get_code_for_children(self.children, include_prefix) + + def get_leaf_for_position(self, position, include_prefixes=False): + """ + Get the :py:class:`parso.tree.Leaf` at ``position`` + + :param tuple position: A position tuple, row, column. Rows start from 1 + :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls + on whitespace or comments before a leaf + :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None`` + """ + def binary_search(lower, upper): + if lower == upper: + element = self.children[lower] + if not include_prefixes and position < element.start_pos: + # We're on a prefix. + return None + # In case we have prefixes, a leaf always matches + try: + return element.get_leaf_for_position(position, include_prefixes) + except AttributeError: + return element + + + index = int((lower + upper) / 2) + element = self.children[index] + if position <= element.end_pos: + return binary_search(lower, index) + else: + return binary_search(index + 1, upper) + + if not ((1, 0) <= position <= self.children[-1].end_pos): + raise ValueError('Please provide a position that exists within this node.') + return binary_search(0, len(self.children) - 1) + + def get_first_leaf(self): + return self.children[0].get_first_leaf() + + def get_last_leaf(self): + return self.children[-1].get_last_leaf() + + @utf8_repr + def __repr__(self): + code = self.get_code().replace('\n', ' ').strip() + if not py_version >= 30: + code = code.encode(encoding, 'replace') + return "<%s: %s@%s,%s>" % \ + (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) + + +class Node(BaseNode): + """Concrete implementation for interior nodes.""" + __slots__ = ('type',) + + def __init__(self, type, children): + super(Node, self).__init__(children) + self.type = type + + def __repr__(self): + return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) + + +class ErrorNode(BaseNode): + """ + A node that contains valid nodes/leaves that we're follow by a token that + was invalid. This basically means that the leaf after this node is where + Python would mark a syntax error. + """ + __slots__ = () + type = 'error_node' + + +class ErrorLeaf(Leaf): + """ + A leaf that is either completely invalid in a language (like `$` in Python) + or is invalid at that position. Like the star in `1 +* 1`. + """ + __slots__ = ('original_type',) + type = 'error_leaf' + + def __init__(self, original_type, value, start_pos, prefix=''): + super(ErrorLeaf, self).__init__(value, start_pos, prefix) + self.original_type = original_type + + def __repr__(self): + return "<%s: %s:%s, %s>" % \ + (type(self).__name__, self.original_type, repr(self.value), self.start_pos) diff --git a/pythonFiles/parso/utils.py b/pythonFiles/parso/utils.py new file mode 100644 index 000000000000..a4801b996616 --- /dev/null +++ b/pythonFiles/parso/utils.py @@ -0,0 +1,156 @@ +from collections import namedtuple +import re +import sys +from ast import literal_eval + +from parso._compatibility import unicode, total_ordering + + +Version = namedtuple('Version', 'major, minor, micro') + + +def split_lines(string, keepends=False): + r""" + Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, + looks at form feeds and other special characters as normal text. Just + splits ``\n`` and ``\r\n``. + Also different: Returns ``[""]`` for an empty string input. + + In Python 2.7 form feeds are used as normal characters when using + str.splitlines. However in Python 3 somewhere there was a decision to split + also on form feeds. + """ + if keepends: + lst = string.splitlines(True) + + # We have to merge lines that were broken by form feed characters. + merge = [] + for i, line in enumerate(lst): + if line.endswith('\f'): + merge.append(i) + + for index in reversed(merge): + try: + lst[index] = lst[index] + lst[index + 1] + del lst[index + 1] + except IndexError: + # index + 1 can be empty and therefore there's no need to + # merge. + pass + + # The stdlib's implementation of the end is inconsistent when calling + # it with/without keepends. One time there's an empty string in the + # end, one time there's none. + if string.endswith('\n') or string == '': + lst.append('') + return lst + else: + return re.split('\n|\r\n', string) + + +def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'): + """ + Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a + unicode object like in :py:meth:`bytes.decode`. + + :param encoding: See :py:meth:`bytes.decode` documentation. + :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be + ``'strict'``, ``'replace'`` or ``'ignore'``. + """ + def detect_encoding(): + """ + For the implementation of encoding definitions in Python, look at: + - http://www.python.org/dev/peps/pep-0263/ + - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations + """ + byte_mark = literal_eval(r"b'\xef\xbb\xbf'") + if source.startswith(byte_mark): + # UTF-8 byte-order mark + return 'utf-8' + + first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) + possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", + first_two_lines) + if possible_encoding: + return possible_encoding.group(1) + else: + # the default if nothing else has been set -> PEP 263 + return encoding + + if isinstance(source, unicode): + # only cast str/bytes + return source + + encoding = detect_encoding() + if not isinstance(encoding, unicode): + encoding = unicode(encoding, 'utf-8', 'replace') + + # Cast to unicode + return unicode(source, encoding, errors) + + +def version_info(): + """ + Returns a namedtuple of parso's version, similar to Python's + ``sys.version_info``. + """ + from parso import __version__ + tupl = re.findall(r'[a-z]+|\d+', __version__) + return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) + + +def _parse_version(version): + match = re.match(r'(\d+)(?:\.(\d)(?:\.\d+)?)?$', version) + if match is None: + raise ValueError('The given version is not in the right format. ' + 'Use something like "3.2" or "3".') + + major = int(match.group(1)) + minor = match.group(2) + if minor is None: + # Use the latest Python in case it's not exactly defined, because the + # grammars are typically backwards compatible? + if major == 2: + minor = "7" + elif major == 3: + minor = "6" + else: + raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") + minor = int(minor) + return PythonVersionInfo(major, minor) + + +@total_ordering +class PythonVersionInfo(namedtuple('Version', 'major, minor')): + def __gt__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) > other + super(PythonVersionInfo, self).__gt__(other) + + return (self.major, self.minor) + + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) == other + super(PythonVersionInfo, self).__eq__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + +def parse_version_string(version=None): + """ + Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and + returns a corresponding version info that is always two characters long in + decimal. + """ + if version is None: + version = '%s.%s' % sys.version_info[:2] + if not isinstance(version, (unicode, str)): + raise TypeError("version must be a string like 3.2.") + + return _parse_version(version) From 41a00b68a611793053c214ca4685705ec01297fa Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Fri, 1 Jun 2018 21:41:18 -0700 Subject: [PATCH 3/4] Update `parso` package to 0.2.1 --- news/3 Code Health/1833.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/3 Code Health/1833.md diff --git a/news/3 Code Health/1833.md b/news/3 Code Health/1833.md new file mode 100644 index 000000000000..e9a49948e14a --- /dev/null +++ b/news/3 Code Health/1833.md @@ -0,0 +1 @@ +Update `parso` package to 0.2.1. From ba611d4bd0008d33e13cecc28a50601acb7560f0 Mon Sep 17 00:00:00 2001 From: Don Jayamanne Date: Fri, 1 Jun 2018 21:44:34 -0700 Subject: [PATCH 4/4] Fix for intellisense failing when using the new `Outline` feature --- news/2 Fixes/1721.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/2 Fixes/1721.md diff --git a/news/2 Fixes/1721.md b/news/2 Fixes/1721.md new file mode 100644 index 000000000000..8b733365272c --- /dev/null +++ b/news/2 Fixes/1721.md @@ -0,0 +1 @@ +Fix for intellisense failing when using the new `Outline` feature.