-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
857 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,348 @@ | ||
import numpy as np | ||
import json | ||
import sys | ||
|
||
# Get the input of the regular expression | ||
inpfile = sys.argv[1] | ||
with open(inpfile, 'r') as fp: | ||
regexp = json.load(fp) | ||
inp = regexp["regex"] | ||
fname = sys.argv[2] | ||
# Add concatenation operators | ||
regex = [inp[0]] | ||
for i in range(1, len(inp)): | ||
if (regex[-1] not in ['.', '+', '(']) and (inp[i] | ||
not in ['.', '+', '*', ')']): | ||
regex.append('.') | ||
regex.append(inp[i]) | ||
|
||
# Declare all possible operations | ||
union = '+' | ||
concat = '.' | ||
closure = '*' | ||
|
||
|
||
def precedence(operator): | ||
precedence_map = {"(": 0, ".": 3, "*": 4, "+": 2} | ||
if operator not in ["(", ".", "*", "+"]: | ||
return 4 | ||
return precedence_map[operator] | ||
|
||
|
||
def infix_to_postfix(regex): | ||
# Use shunting yard's algorithm to convert infix notation to postfix | ||
''' | ||
Precendence: | ||
1. Paranthesis | ||
2. Kleene Star | ||
3. Concatenation | ||
4. Union | ||
''' | ||
output_queue = [] | ||
operator_stack = [] | ||
i = 0 | ||
while i < len(regex): | ||
val = regex[i] | ||
if val == "(": | ||
operator_stack.append(val) | ||
elif val == ")": | ||
while operator_stack: | ||
if operator_stack[-1] == "(": | ||
break | ||
output_queue.append(operator_stack[-1]) | ||
operator_stack.pop() | ||
operator_stack.pop() | ||
else: | ||
while operator_stack: | ||
if precedence(operator_stack[-1]) >= precedence(val): | ||
output_queue.append(operator_stack[-1]) | ||
operator_stack.pop() | ||
else: | ||
break | ||
operator_stack.append(val) | ||
|
||
i = i + 1 | ||
|
||
while operator_stack: | ||
output_queue.append(operator_stack[-1]) | ||
operator_stack.pop() | ||
|
||
return output_queue | ||
|
||
|
||
def remove_instance(start_state, inputs, next_state, transition): | ||
for i in range(0, len(transition)): | ||
if (transition[i]["start_state"] == start_state | ||
and transition[i]["input"] == inputs | ||
and transition[i]["next_state"] == next_state): | ||
# Remove the transition instance | ||
transition.pop(i) | ||
break | ||
|
||
|
||
def exists_ephsilon(state1, state2, transition): | ||
for i in range(0, len(transition)): | ||
if transition[i]["start_state"] == state1 and transition[i][ | ||
"next_state"] == state2 and transition[i]["input"] == '$': | ||
return 1 | ||
return 0 | ||
|
||
|
||
def thomopson(output_queue): | ||
# Use thompson's algorithm to convert infix notation to postfix | ||
# let us try to convert regex to non-finite automata | ||
# in order to do so, we have to append the final starts of previous states | ||
# Declare all variables | ||
start_state = '' | ||
final_states = [] | ||
alphabets = [] | ||
transition = [] | ||
state_set = [] | ||
|
||
# Find the total number of alphabets in the regex | ||
for i in range(0, len(regex)): | ||
if (regex[i] | ||
not in alphabets) and ((regex[i] >= '0' and regex[i] <= '9') or | ||
(regex[i] >= 'a' and regex[i] <= 'z')): | ||
alphabets.append(regex[i]) | ||
|
||
# take care of corner cases first | ||
if len(output_queue) == 1: | ||
if output_queue[0] == "$": | ||
transition.append({ | ||
"start_state": 'q0', | ||
"input": '$', | ||
"next_state": 'q1' | ||
}) | ||
start_state = 'q0' | ||
final_states.append('q1') | ||
elif output_queue[0] == "/": | ||
start_state = 'q0' | ||
elif (output_queue[0] >= '0' | ||
and output_queue[0] <= '9') or (output_queue[0] >= 'a' | ||
and output_queue[0] <= 'z'): | ||
val1 = output_queue[0] | ||
transition.append({ | ||
"start_state": 'q' + '0', | ||
"input": val1, | ||
"next_state": 'q' + '1' | ||
}) | ||
start_state = 'q0' | ||
final_states.append('q1') | ||
return state_set, start_state, final_states, alphabets, transition | ||
|
||
track_dict = {} | ||
thompson_stack = [] | ||
j = 0 | ||
i = 0 | ||
while i < len(output_queue): | ||
if (output_queue[i] >= '0' and output_queue[i] <= '9') or ( | ||
output_queue[i] >= 'a' | ||
and output_queue[i] <= 'z') or (output_queue[i] == '$'): | ||
thompson_stack.append(output_queue[i]) | ||
if output_queue[i] == "+": | ||
val1 = thompson_stack.pop() | ||
val2 = thompson_stack.pop() | ||
new_str = "new" + str(i) | ||
final = [] | ||
state_set.append('q' + str(j) + '4') | ||
if len(val1) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '0', | ||
"input": val1, | ||
"next_state": 'q' + str(j) + '1' | ||
}) | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '4', | ||
"input": '$', | ||
"next_state": 'q' + str(j) + '0' | ||
}) | ||
state_set.append('q' + str(j) + '0') | ||
state_set.append('q' + str(j) + '1') | ||
final.append('q' + str(j) + '1') | ||
if len(val2) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '2', | ||
"input": val2, | ||
"next_state": 'q' + str(j) + '3' | ||
}) | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '4', | ||
"input": '$', | ||
"next_state": 'q' + str(j) + '2' | ||
}) | ||
state_set.append('q' + str(j) + '2') | ||
state_set.append('q' + str(j) + '3') | ||
final.append('q' + str(j) + '3') | ||
if not len(val1) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '4', | ||
"input": '$', | ||
"next_state": track_dict[val1]["start_state"] | ||
}) | ||
for k in range(0, len(track_dict[val1]["final_states"])): | ||
final.append(track_dict[val1]["final_states"][k]) | ||
if not len(val2) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '4', | ||
"input": '$', | ||
"next_state": track_dict[val2]["start_state"] | ||
}) | ||
for k in range(0, len(track_dict[val2]["final_states"])): | ||
final.append(track_dict[val2]["final_states"][k]) | ||
track_dict[new_str] = { | ||
"start_state": 'q' + str(j) + '4', | ||
"final_states": final | ||
} | ||
thompson_stack.append(new_str) | ||
|
||
j += 1 | ||
|
||
if output_queue[i] == "*": | ||
val1 = thompson_stack.pop() | ||
new_str = "new" + str(i) | ||
final = [] | ||
start = '' | ||
if len(val1) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '0', | ||
"input": val1, | ||
"next_state": 'q' + str(j) + '1' | ||
}) | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '1', | ||
"input": '$', | ||
"next_state": 'q' + str(j) + '0' | ||
}) | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '2', | ||
"input": '$', | ||
"next_state": 'q' + str(j) + '0' | ||
}) | ||
state_set.append('q' + str(j) + '0') | ||
state_set.append('q' + str(j) + '1') | ||
state_set.append('q' + str(j) + '2') | ||
final.append('q' + str(j) + '2') | ||
final.append('q' + str(j) + '1') | ||
start = 'q' + str(j) + '2' | ||
if not len(val1) == 1: | ||
for k in range(0, len(track_dict[val1]["final_states"])): | ||
transition.append({ | ||
"start_state": | ||
track_dict[val1]["final_states"][k], | ||
"input": | ||
'$', | ||
"next_state": | ||
track_dict[val1]["start_state"] | ||
}) | ||
final.append(track_dict[val1]["final_states"][k]) | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '0', | ||
"input": '$', | ||
"next_state": track_dict[val1]["start_state"] | ||
}) | ||
state_set.append('q' + str(j) + '0') | ||
start = 'q' + str(j) + '0' | ||
final.append('q' + str(j) + '0') | ||
track_dict[new_str] = {"start_state": start, "final_states": final} | ||
thompson_stack.append(new_str) | ||
|
||
j += 1 | ||
|
||
if output_queue[i] == ".": | ||
val2 = thompson_stack.pop() | ||
val1 = thompson_stack.pop() | ||
final = [] | ||
start = '' | ||
new_str = "new" + str(i) | ||
if len(val1) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '0', | ||
"input": val1, | ||
"next_state": 'q' + str(j) + '1' | ||
}) | ||
state_set.append('q' + str(j) + '0') | ||
state_set.append('q' + str(j) + '1') | ||
if len(val2) == 1: | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '2', | ||
"input": val2, | ||
"next_state": 'q' + str(j) + '3' | ||
}) | ||
state_set.append('q' + str(j) + '2') | ||
state_set.append('q' + str(j) + '3') | ||
|
||
if len(val2) == 1 and (not len(val1) == 1): | ||
for k in range(0, len(track_dict[val1]["final_states"])): | ||
transition.append({ | ||
"start_state": | ||
track_dict[val1]["final_states"][k], | ||
"input": | ||
'$', | ||
"next_state": | ||
'q' + str(j) + '2' | ||
}) | ||
final.append('q' + str(j) + '3') | ||
start = track_dict[val1]["start_state"] | ||
elif len(val2) == 1 and (len(val1) == 1): | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '1', | ||
"input": '$', | ||
"next_state": 'q' + str(j) + '2' | ||
}) | ||
final.append('q' + str(j) + '3') | ||
start = 'q' + str(j) + '0' | ||
elif (not len(val2) == 1) and (len(val1) == 1): | ||
transition.append({ | ||
"start_state": 'q' + str(j) + '1', | ||
"input": '$', | ||
"next_state": track_dict[val2]["start_state"] | ||
}) | ||
for k in range(0, len(track_dict[val2]["final_states"])): | ||
final.append(track_dict[val2]["final_states"][k]) | ||
start = 'q' + str(j) + '0' | ||
elif (not len(val2) == 1) and (not len(val1) == 1): | ||
for k in range(0, len(track_dict[val1]["final_states"])): | ||
transition.append({ | ||
"start_state": | ||
track_dict[val1]["final_states"][k], | ||
"input": | ||
'$', | ||
"next_state": | ||
track_dict[val2]["start_state"] | ||
}) | ||
for k in range(0, len(track_dict[val2]["final_states"])): | ||
final.append(track_dict[val2]["final_states"][k]) | ||
start = track_dict[val1]["start_state"] | ||
track_dict[new_str] = {"start_state": start, "final_states": final} | ||
thompson_stack.append(new_str) | ||
|
||
j += 1 | ||
i += 1 | ||
assign = thompson_stack.pop() | ||
start_state = track_dict[assign]["start_state"] | ||
final_states = track_dict[assign]["final_states"] | ||
return state_set, start_state, final_states, alphabets, transition | ||
|
||
|
||
output_queue = infix_to_postfix(regex) | ||
state_set, start_state, final_states, alphabets, transition = thomopson( | ||
output_queue) | ||
|
||
transition_function = [] | ||
start_states = [start_state] | ||
for i in range(0, len(transition)): | ||
transition_function.append([ | ||
transition[i]["start_state"], transition[i]["input"], | ||
transition[i]["next_state"] | ||
]) | ||
|
||
answer = { | ||
"states": state_set, | ||
"letters": alphabets, | ||
"transition_function": transition_function, | ||
"start_states": start_states, | ||
"final_states": final_states | ||
} | ||
with open(fname, 'w') as fp: | ||
json.dump(answer, fp, indent=4) |
Oops, something went wrong.