-
Notifications
You must be signed in to change notification settings - Fork 0
/
basic.py
229 lines (196 loc) · 7.96 KB
/
basic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import re
class Token:
def __init__(self, type, value):
self._type = type
self._value = value
def getType(self):
return self._type
def getValue(self):
return self._value
def isKeyword(word):
keywords = ["is", "print", "if", "else", "elif"]
if word in keywords:
return True
else:
return False
def isDatatype(word):
datatypes = ["int", "float", "string", "boolean", "char"]
if word in datatypes:
return True
else:
return False
def isNumber(num):
pattern = r'^[+-]?\d+(\.\d+)?$'
if re.match(pattern, num):
return True
else:
return False
def isBoolean(word):
keywords = ["TRUE", "FALSE"]
if word in keywords:
return True
else:
return False
def isCharacter(c):
if len(c) == 3 and c[0] == "'" and c[1].isalpha() and c[2] == "'":
return True
else:
return False
def isString(word):
if word[0] == "\"" and word[len(word)-1] == "\"":
return True
else:
return False
def isVariableName(var):
pattern = r'^[a-zA-Z][a-zA-Z0-9_]*$'
if re.match(pattern, var) and len(var) <= 32:
return True
else:
return False
def isMathOperators(c):
arithmeticOperators = ["+", "-", "*", "/", "%", "^", "(", ")"]
if c in arithmeticOperators:
return True
else:
return False
def getMathOperator(c):
if c == "+":
return "add_operator"
elif c == "-":
return "minus_operator"
elif c == "*":
return "mult_operator"
elif c == "/":
return "div_operator"
elif c == "%":
return "mod_operator"
elif c == "^":
return "exponent"
elif c == "(":
return "open_par"
elif c == ")":
return "close_par"
def isConditionalOperator(c):
conditionalOperators = ["=", "!", "<", ">"]
if c in conditionalOperators:
return True
else:
return False
def getConditionalOperator(cc):
if cc == "=":
return "equals"
elif cc == "==":
return "equalto"
elif cc == "!":
return "not"
elif cc == "!=":
return "notequalto"
elif cc == ">":
return "greaterthan"
elif cc == ">=":
return "greaterthanorequalto"
elif cc == "<":
return "lessthan"
elif cc == "<=":
return "lessthanorequalto"
else:
return None
def getNumericalDatatype(num):
if isinstance(eval(num), int):
return "int"
else:
return "float"
def lexer(linesOfCode):
oneLineOfTokens = []
tokens = []
for line in linesOfCode: # for every line in the code
pos = 0
while pos < len(line): # for every character in the line
temp = ""
if line[pos].isdigit(): # if the character is a digit, append all digits
while pos < len(line) and (line[pos].isdigit() or line[pos] == "."):
temp += line[pos]
pos += 1
if isNumber(temp): # if the number is valid, add as a token
oneLineOfTokens.append(Token(getNumericalDatatype(temp), temp))
else: # if number is invalid, print error
oneLineOfTokens.append(Token('NUMBER_ERROR', temp))
break
elif line[pos] == "\"": # if a quotation mark is encountered, could be a string
temp += line[pos]
pos += 1
while pos < len(line) and line[pos] != "\"":
temp += line[pos]
pos += 1
try:
temp += line[pos]
except:
oneLineOfTokens.append(Token('STRING_ERROR', temp))
break
if isString(temp): # if the word is a string, add as token
oneLineOfTokens.append(Token('string', temp[1:-1]))
if(pos < len(line)):
pos += 1
else: # if it is invalid
oneLineOfTokens.append(Token('STRING_ERROR', temp))
break
elif line[pos] == "'": # if a quotation mark is encountered, could be a string
temp += line[pos]
pos += 1
while pos < len(line) and line[pos] != "'":
temp += line[pos]
pos += 1
try:
temp += line[pos]
except:
oneLineOfTokens.append(Token('CHARACTER_ERROR', temp))
break
if isCharacter(temp): # if the word is a character, add as token
oneLineOfTokens.append(Token('char', temp[1:-1]))
if(pos < len(line)):
pos += 1
else: # if it is invalid
oneLineOfTokens.append(Token('CHARACTER_ERROR', temp))
break
elif line[pos].isalpha():
while pos < len(line) and (line[pos].isalpha() or line[pos] == "_" or line[pos].isdigit()):
temp += line[pos]
pos += 1
if isKeyword(temp): # if the word is a keyword, add as a token
oneLineOfTokens.append(Token('keyword', temp))
elif isDatatype(temp):
oneLineOfTokens.append(Token('datatype', temp))
elif isBoolean(temp): # if the word is boolean, add as a token
oneLineOfTokens.append(Token('boolean', temp))
elif isVariableName(temp.lower()): # if the word is a valid variable name, add as a token
oneLineOfTokens.append(Token('variable', temp.lower()))
else: # if the word is invalid
oneLineOfTokens.append(Token('NAME_ERROR', temp))
break
elif isConditionalOperator(line[pos]): # if it is a conditional operator, determine the specific operator and add as token
while pos < len(line) and isConditionalOperator(line[pos]):
temp += line[pos]
pos += 1
res = getConditionalOperator(temp)
if res != None:
oneLineOfTokens.append(Token(getConditionalOperator(temp), temp))
else:
print("HELLLO")
oneLineOfTokens.append(Token("CONDITIONAL_ERROR", temp))
break
elif re.match(r'^ {4}', line[pos:]): # Match exactly four spaces at the beginning of the line
oneLineOfTokens.append(Token('indentation', ' '))
pos += 4
elif isMathOperators(line[pos]): # if it is a math operator, determine the specific operator and add as token
oneLineOfTokens.append(Token(getMathOperator(line[pos]), line[pos]))
pos += 1
elif line[pos] == " ": # move to the next position if space is encountered
pos += 1
else: # anything else, its probably invalid
oneLineOfTokens.append(Token('CODE_ERROR', temp))
break
if len(oneLineOfTokens) == 0:
oneLineOfTokens.append(Token('newline', '\n'))
tokens.append(oneLineOfTokens)
oneLineOfTokens = []
return tokens