By Alejandro Santos, alejolp@gmail.com
This project is a Python 3 tokenizer, parser generator and parser for the Python 3 grammar file. No specific parsing library was used, and no regular expressions are used as well. This is all pure Python 3 code.
For the moment only the AST tree is generated, directly translated from the Grammar description.
The parser generator is a Python code generator that outputs a Recursive Descent Parser.
Example output:
$ ./main.py parse test.py
Encoding: utf-8
(1, 0, 5, 1, 'print')
(7, 5, 5, 1, '(')
(2, 6, 7, 1, '2')
(14, 7, 7, 1, '+')
(2, 8, 9, 1, '2')
(8, 9, 9, 1, ')')
(4, 10, 9, 1, '\n')
(1, 11, 12, 2, 'W')
(22, 13, 13, 2, '=')
(9, 15, 15, 2, '[')
(2, 16, 17, 2, '2')
(1, 18, 21, 2, 'for')
(1, 22, 23, 2, 'x')
(1, 24, 26, 2, 'in')
(9, 27, 27, 2, '[')
(10, 28, 27, 2, ']')
(10, 29, 27, 2, ']')
(4, 30, 27, 2, '\n')
(1, 32, 35, 4, 'def')
(1, 36, 39, 4, 'foo')
(7, 39, 39, 4, '(')
(1, 41, 46, 4, 'value')
(11, 46, 46, 4, ':')
(1, 48, 51, 4, 'int')
(8, 52, 52, 4, ')')
(50, 54, 54, 4, '->')
(1, 57, 60, 4, 'int')
(11, 60, 60, 4, ':')
(4, 61, 60, 4, '\n')
(5, 62, 66, 5, None)
(1, 66, 72, 5, 'return')
(1, 73, 78, 5, 'value')
(14, 79, 79, 5, '+')
(2, 81, 82, 5, '2')
(4, 82, 82, 5, '\n')
(6, 84, 84, 7, None)
(0, 84, 84, 7, None)
file_input()
stmt()
simple_stmt()
small_stmt()
expr_stmt()
testlist_star_expr()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('print')
trailer()
'('('(')
arglist()
argument()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NUMBER('2')
'+'('+')
term()
factor()
power()
atom()
NUMBER('2')
')'(')')
NEWLINE('\n')
stmt()
simple_stmt()
small_stmt()
expr_stmt()
testlist_star_expr()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('W')
'='('=')
testlist_star_expr()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
'['('[')
testlist_comp()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NUMBER('2')
comp_for()
'for'('for')
exprlist()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('x')
'in'('in')
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
'['('[')
']'(']')
']'(']')
NEWLINE('\n')
stmt()
compound_stmt()
funcdef()
'def'('def')
NAME('foo')
parameters()
'('('(')
typedargslist()
tfpdef()
NAME('value')
':'(':')
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('int')
')'(')')
'->'('->')
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('int')
':'(':')
suite()
NEWLINE('\n')
INDENT()
stmt()
simple_stmt()
small_stmt()
flow_stmt()
return_stmt()
'return'('return')
testlist()
test()
or_test()
and_test()
not_test()
comparison()
expr()
xor_expr()
and_expr()
shift_expr()
arith_expr()
term()
factor()
power()
atom()
NAME('value')
'+'('+')
term()
factor()
power()
atom()
NUMBER('2')
NEWLINE('\n')
DEDENT()
ENDMARKER()
The MIT license. See LICENSE.md
.