-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
83 lines (65 loc) · 2.39 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import nltk
from nltk import word_tokenize, CFG
import sys
import re
import StatementCollectorExtention as sce
from Interpreter import Interpreter
def parse_input(parser, tokens, debug=False):
try:
if debug:
parser._trace = 3
for tree in parser.parse(tokens):
if debug:
print(tree)
tree.pretty_print()
# tree.draw()
return True
except nltk.EarleyChartParser.NotParseable:
pass
finally:
if debug:
parser._trace = 0
return False
if __name__ == "__main__":
debug_str = "False" # input("Debugger aktivieren? (True/False) ")
while not re.match(r"(True|False)", debug_str):
debug_str = input("Ungültige Eingabe! Debugger aktivieren? ")
debug = bool(debug_str)
path = input("Welche Datei soll interpretiert werden? ")
if not path:
path = "./examples/list.while"
if not path.endswith(".while"):
print(f"Unter '{path}' ist keine Quellcode Datei.")
sys.exit(-1)
source_code = open(path, encoding="utf-8", mode="r").read()
# lex to tokens
tokenize_source = word_tokenize(source_code)
keywords = ["=", "!", "do", "while", "end", "print", ";"]
for index in range(len(tokenize_source)):
token = tokenize_source[index]
if not token in keywords:
if re.match("([0-9])", token):
tokenize_source[index] = 'num'
elif re.match("([a-z])", token):
tokenize_source[index] = 'var'
grammar = CFG.fromstring("""
programm -> 'var' '=' 'var' operator 'num'
programm -> 'var' '+=' 'num'
programm -> programm ';' programm
programm -> 'while' 'var' '!' '=' 'num' 'do' programm 'end'
programm -> 'print' 'var'
operator -> '+' | '-'
""")
# Erstellen Sie einen Parser basierend auf der Grammatik
parser = nltk.EarleyChartParser(grammar=grammar)
parsed = parse_input(parser, tokenize_source, debug)
print(f"parse: {parsed}")
if not parsed:
print("Syntax Error!")
else:
statements = sce.StatementCollectorExtention(word_tokenize(source_code)).summarize_statements()
generate_code = Interpreter(statements).generate_code()
# code pre-view
print(f"\n{generate_code.strip()}\n\nProgramm output:\n")
# execute code
exec(generate_code)