Skip to content

Commit

Permalink
Codes
Browse files Browse the repository at this point in the history
  • Loading branch information
nickinack authored Apr 18, 2021
1 parent 9e3b46f commit cfa06f9
Show file tree
Hide file tree
Showing 4 changed files with 857 additions and 0 deletions.
348 changes: 348 additions & 0 deletions q1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
import numpy as np
import json
import sys

# Get the input of the regular expression
inpfile = sys.argv[1]
with open(inpfile, 'r') as fp:
regexp = json.load(fp)
inp = regexp["regex"]
fname = sys.argv[2]
# Add concatenation operators
regex = [inp[0]]
for i in range(1, len(inp)):
if (regex[-1] not in ['.', '+', '(']) and (inp[i]
not in ['.', '+', '*', ')']):
regex.append('.')
regex.append(inp[i])

# Declare all possible operations
union = '+'
concat = '.'
closure = '*'


def precedence(operator):
precedence_map = {"(": 0, ".": 3, "*": 4, "+": 2}
if operator not in ["(", ".", "*", "+"]:
return 4
return precedence_map[operator]


def infix_to_postfix(regex):
# Use shunting yard's algorithm to convert infix notation to postfix
'''
Precendence:
1. Paranthesis
2. Kleene Star
3. Concatenation
4. Union
'''
output_queue = []
operator_stack = []
i = 0
while i < len(regex):
val = regex[i]
if val == "(":
operator_stack.append(val)
elif val == ")":
while operator_stack:
if operator_stack[-1] == "(":
break
output_queue.append(operator_stack[-1])
operator_stack.pop()
operator_stack.pop()
else:
while operator_stack:
if precedence(operator_stack[-1]) >= precedence(val):
output_queue.append(operator_stack[-1])
operator_stack.pop()
else:
break
operator_stack.append(val)

i = i + 1

while operator_stack:
output_queue.append(operator_stack[-1])
operator_stack.pop()

return output_queue


def remove_instance(start_state, inputs, next_state, transition):
for i in range(0, len(transition)):
if (transition[i]["start_state"] == start_state
and transition[i]["input"] == inputs
and transition[i]["next_state"] == next_state):
# Remove the transition instance
transition.pop(i)
break


def exists_ephsilon(state1, state2, transition):
for i in range(0, len(transition)):
if transition[i]["start_state"] == state1 and transition[i][
"next_state"] == state2 and transition[i]["input"] == '$':
return 1
return 0


def thomopson(output_queue):
# Use thompson's algorithm to convert infix notation to postfix
# let us try to convert regex to non-finite automata
# in order to do so, we have to append the final starts of previous states
# Declare all variables
start_state = ''
final_states = []
alphabets = []
transition = []
state_set = []

# Find the total number of alphabets in the regex
for i in range(0, len(regex)):
if (regex[i]
not in alphabets) and ((regex[i] >= '0' and regex[i] <= '9') or
(regex[i] >= 'a' and regex[i] <= 'z')):
alphabets.append(regex[i])

# take care of corner cases first
if len(output_queue) == 1:
if output_queue[0] == "$":
transition.append({
"start_state": 'q0',
"input": '$',
"next_state": 'q1'
})
start_state = 'q0'
final_states.append('q1')
elif output_queue[0] == "/":
start_state = 'q0'
elif (output_queue[0] >= '0'
and output_queue[0] <= '9') or (output_queue[0] >= 'a'
and output_queue[0] <= 'z'):
val1 = output_queue[0]
transition.append({
"start_state": 'q' + '0',
"input": val1,
"next_state": 'q' + '1'
})
start_state = 'q0'
final_states.append('q1')
return state_set, start_state, final_states, alphabets, transition

track_dict = {}
thompson_stack = []
j = 0
i = 0
while i < len(output_queue):
if (output_queue[i] >= '0' and output_queue[i] <= '9') or (
output_queue[i] >= 'a'
and output_queue[i] <= 'z') or (output_queue[i] == '$'):
thompson_stack.append(output_queue[i])
if output_queue[i] == "+":
val1 = thompson_stack.pop()
val2 = thompson_stack.pop()
new_str = "new" + str(i)
final = []
state_set.append('q' + str(j) + '4')
if len(val1) == 1:
transition.append({
"start_state": 'q' + str(j) + '0',
"input": val1,
"next_state": 'q' + str(j) + '1'
})
transition.append({
"start_state": 'q' + str(j) + '4',
"input": '$',
"next_state": 'q' + str(j) + '0'
})
state_set.append('q' + str(j) + '0')
state_set.append('q' + str(j) + '1')
final.append('q' + str(j) + '1')
if len(val2) == 1:
transition.append({
"start_state": 'q' + str(j) + '2',
"input": val2,
"next_state": 'q' + str(j) + '3'
})
transition.append({
"start_state": 'q' + str(j) + '4',
"input": '$',
"next_state": 'q' + str(j) + '2'
})
state_set.append('q' + str(j) + '2')
state_set.append('q' + str(j) + '3')
final.append('q' + str(j) + '3')
if not len(val1) == 1:
transition.append({
"start_state": 'q' + str(j) + '4',
"input": '$',
"next_state": track_dict[val1]["start_state"]
})
for k in range(0, len(track_dict[val1]["final_states"])):
final.append(track_dict[val1]["final_states"][k])
if not len(val2) == 1:
transition.append({
"start_state": 'q' + str(j) + '4',
"input": '$',
"next_state": track_dict[val2]["start_state"]
})
for k in range(0, len(track_dict[val2]["final_states"])):
final.append(track_dict[val2]["final_states"][k])
track_dict[new_str] = {
"start_state": 'q' + str(j) + '4',
"final_states": final
}
thompson_stack.append(new_str)

j += 1

if output_queue[i] == "*":
val1 = thompson_stack.pop()
new_str = "new" + str(i)
final = []
start = ''
if len(val1) == 1:
transition.append({
"start_state": 'q' + str(j) + '0',
"input": val1,
"next_state": 'q' + str(j) + '1'
})
transition.append({
"start_state": 'q' + str(j) + '1',
"input": '$',
"next_state": 'q' + str(j) + '0'
})
transition.append({
"start_state": 'q' + str(j) + '2',
"input": '$',
"next_state": 'q' + str(j) + '0'
})
state_set.append('q' + str(j) + '0')
state_set.append('q' + str(j) + '1')
state_set.append('q' + str(j) + '2')
final.append('q' + str(j) + '2')
final.append('q' + str(j) + '1')
start = 'q' + str(j) + '2'
if not len(val1) == 1:
for k in range(0, len(track_dict[val1]["final_states"])):
transition.append({
"start_state":
track_dict[val1]["final_states"][k],
"input":
'$',
"next_state":
track_dict[val1]["start_state"]
})
final.append(track_dict[val1]["final_states"][k])
transition.append({
"start_state": 'q' + str(j) + '0',
"input": '$',
"next_state": track_dict[val1]["start_state"]
})
state_set.append('q' + str(j) + '0')
start = 'q' + str(j) + '0'
final.append('q' + str(j) + '0')
track_dict[new_str] = {"start_state": start, "final_states": final}
thompson_stack.append(new_str)

j += 1

if output_queue[i] == ".":
val2 = thompson_stack.pop()
val1 = thompson_stack.pop()
final = []
start = ''
new_str = "new" + str(i)
if len(val1) == 1:
transition.append({
"start_state": 'q' + str(j) + '0',
"input": val1,
"next_state": 'q' + str(j) + '1'
})
state_set.append('q' + str(j) + '0')
state_set.append('q' + str(j) + '1')
if len(val2) == 1:
transition.append({
"start_state": 'q' + str(j) + '2',
"input": val2,
"next_state": 'q' + str(j) + '3'
})
state_set.append('q' + str(j) + '2')
state_set.append('q' + str(j) + '3')

if len(val2) == 1 and (not len(val1) == 1):
for k in range(0, len(track_dict[val1]["final_states"])):
transition.append({
"start_state":
track_dict[val1]["final_states"][k],
"input":
'$',
"next_state":
'q' + str(j) + '2'
})
final.append('q' + str(j) + '3')
start = track_dict[val1]["start_state"]
elif len(val2) == 1 and (len(val1) == 1):
transition.append({
"start_state": 'q' + str(j) + '1',
"input": '$',
"next_state": 'q' + str(j) + '2'
})
final.append('q' + str(j) + '3')
start = 'q' + str(j) + '0'
elif (not len(val2) == 1) and (len(val1) == 1):
transition.append({
"start_state": 'q' + str(j) + '1',
"input": '$',
"next_state": track_dict[val2]["start_state"]
})
for k in range(0, len(track_dict[val2]["final_states"])):
final.append(track_dict[val2]["final_states"][k])
start = 'q' + str(j) + '0'
elif (not len(val2) == 1) and (not len(val1) == 1):
for k in range(0, len(track_dict[val1]["final_states"])):
transition.append({
"start_state":
track_dict[val1]["final_states"][k],
"input":
'$',
"next_state":
track_dict[val2]["start_state"]
})
for k in range(0, len(track_dict[val2]["final_states"])):
final.append(track_dict[val2]["final_states"][k])
start = track_dict[val1]["start_state"]
track_dict[new_str] = {"start_state": start, "final_states": final}
thompson_stack.append(new_str)

j += 1
i += 1
assign = thompson_stack.pop()
start_state = track_dict[assign]["start_state"]
final_states = track_dict[assign]["final_states"]
return state_set, start_state, final_states, alphabets, transition


output_queue = infix_to_postfix(regex)
state_set, start_state, final_states, alphabets, transition = thomopson(
output_queue)

transition_function = []
start_states = [start_state]
for i in range(0, len(transition)):
transition_function.append([
transition[i]["start_state"], transition[i]["input"],
transition[i]["next_state"]
])

answer = {
"states": state_set,
"letters": alphabets,
"transition_function": transition_function,
"start_states": start_states,
"final_states": final_states
}
with open(fname, 'w') as fp:
json.dump(answer, fp, indent=4)
Loading

0 comments on commit cfa06f9

Please sign in to comment.