From cfa06f97bc75926f2a98c0d467a71a14c8b1d1d0 Mon Sep 17 00:00:00 2001 From: Karthik Viswanathan Date: Sun, 18 Apr 2021 10:25:14 +0400 Subject: [PATCH] Codes --- q1.py | 348 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ q2.py | 116 ++++++++++++++++++++ q3.py | 182 ++++++++++++++++++++++++++++++ q4.py | 211 +++++++++++++++++++++++++++++++++++ 4 files changed, 857 insertions(+) create mode 100644 q1.py create mode 100644 q2.py create mode 100644 q3.py create mode 100644 q4.py diff --git a/q1.py b/q1.py new file mode 100644 index 0000000..58c886a --- /dev/null +++ b/q1.py @@ -0,0 +1,348 @@ +import numpy as np +import json +import sys + +# Get the input of the regular expression +inpfile = sys.argv[1] +with open(inpfile, 'r') as fp: + regexp = json.load(fp) +inp = regexp["regex"] +fname = sys.argv[2] +# Add concatenation operators +regex = [inp[0]] +for i in range(1, len(inp)): + if (regex[-1] not in ['.', '+', '(']) and (inp[i] + not in ['.', '+', '*', ')']): + regex.append('.') + regex.append(inp[i]) + +# Declare all possible operations +union = '+' +concat = '.' +closure = '*' + + +def precedence(operator): + precedence_map = {"(": 0, ".": 3, "*": 4, "+": 2} + if operator not in ["(", ".", "*", "+"]: + return 4 + return precedence_map[operator] + + +def infix_to_postfix(regex): + # Use shunting yard's algorithm to convert infix notation to postfix + ''' + Precendence: + 1. Paranthesis + 2. Kleene Star + 3. Concatenation + 4. Union + ''' + output_queue = [] + operator_stack = [] + i = 0 + while i < len(regex): + val = regex[i] + if val == "(": + operator_stack.append(val) + elif val == ")": + while operator_stack: + if operator_stack[-1] == "(": + break + output_queue.append(operator_stack[-1]) + operator_stack.pop() + operator_stack.pop() + else: + while operator_stack: + if precedence(operator_stack[-1]) >= precedence(val): + output_queue.append(operator_stack[-1]) + operator_stack.pop() + else: + break + operator_stack.append(val) + + i = i + 1 + + while operator_stack: + output_queue.append(operator_stack[-1]) + operator_stack.pop() + + return output_queue + + +def remove_instance(start_state, inputs, next_state, transition): + for i in range(0, len(transition)): + if (transition[i]["start_state"] == start_state + and transition[i]["input"] == inputs + and transition[i]["next_state"] == next_state): + # Remove the transition instance + transition.pop(i) + break + + +def exists_ephsilon(state1, state2, transition): + for i in range(0, len(transition)): + if transition[i]["start_state"] == state1 and transition[i][ + "next_state"] == state2 and transition[i]["input"] == '$': + return 1 + return 0 + + +def thomopson(output_queue): + # Use thompson's algorithm to convert infix notation to postfix + # let us try to convert regex to non-finite automata + # in order to do so, we have to append the final starts of previous states + # Declare all variables + start_state = '' + final_states = [] + alphabets = [] + transition = [] + state_set = [] + + # Find the total number of alphabets in the regex + for i in range(0, len(regex)): + if (regex[i] + not in alphabets) and ((regex[i] >= '0' and regex[i] <= '9') or + (regex[i] >= 'a' and regex[i] <= 'z')): + alphabets.append(regex[i]) + + # take care of corner cases first + if len(output_queue) == 1: + if output_queue[0] == "$": + transition.append({ + "start_state": 'q0', + "input": '$', + "next_state": 'q1' + }) + start_state = 'q0' + final_states.append('q1') + elif output_queue[0] == "/": + start_state = 'q0' + elif (output_queue[0] >= '0' + and output_queue[0] <= '9') or (output_queue[0] >= 'a' + and output_queue[0] <= 'z'): + val1 = output_queue[0] + transition.append({ + "start_state": 'q' + '0', + "input": val1, + "next_state": 'q' + '1' + }) + start_state = 'q0' + final_states.append('q1') + return state_set, start_state, final_states, alphabets, transition + + track_dict = {} + thompson_stack = [] + j = 0 + i = 0 + while i < len(output_queue): + if (output_queue[i] >= '0' and output_queue[i] <= '9') or ( + output_queue[i] >= 'a' + and output_queue[i] <= 'z') or (output_queue[i] == '$'): + thompson_stack.append(output_queue[i]) + if output_queue[i] == "+": + val1 = thompson_stack.pop() + val2 = thompson_stack.pop() + new_str = "new" + str(i) + final = [] + state_set.append('q' + str(j) + '4') + if len(val1) == 1: + transition.append({ + "start_state": 'q' + str(j) + '0', + "input": val1, + "next_state": 'q' + str(j) + '1' + }) + transition.append({ + "start_state": 'q' + str(j) + '4', + "input": '$', + "next_state": 'q' + str(j) + '0' + }) + state_set.append('q' + str(j) + '0') + state_set.append('q' + str(j) + '1') + final.append('q' + str(j) + '1') + if len(val2) == 1: + transition.append({ + "start_state": 'q' + str(j) + '2', + "input": val2, + "next_state": 'q' + str(j) + '3' + }) + transition.append({ + "start_state": 'q' + str(j) + '4', + "input": '$', + "next_state": 'q' + str(j) + '2' + }) + state_set.append('q' + str(j) + '2') + state_set.append('q' + str(j) + '3') + final.append('q' + str(j) + '3') + if not len(val1) == 1: + transition.append({ + "start_state": 'q' + str(j) + '4', + "input": '$', + "next_state": track_dict[val1]["start_state"] + }) + for k in range(0, len(track_dict[val1]["final_states"])): + final.append(track_dict[val1]["final_states"][k]) + if not len(val2) == 1: + transition.append({ + "start_state": 'q' + str(j) + '4', + "input": '$', + "next_state": track_dict[val2]["start_state"] + }) + for k in range(0, len(track_dict[val2]["final_states"])): + final.append(track_dict[val2]["final_states"][k]) + track_dict[new_str] = { + "start_state": 'q' + str(j) + '4', + "final_states": final + } + thompson_stack.append(new_str) + + j += 1 + + if output_queue[i] == "*": + val1 = thompson_stack.pop() + new_str = "new" + str(i) + final = [] + start = '' + if len(val1) == 1: + transition.append({ + "start_state": 'q' + str(j) + '0', + "input": val1, + "next_state": 'q' + str(j) + '1' + }) + transition.append({ + "start_state": 'q' + str(j) + '1', + "input": '$', + "next_state": 'q' + str(j) + '0' + }) + transition.append({ + "start_state": 'q' + str(j) + '2', + "input": '$', + "next_state": 'q' + str(j) + '0' + }) + state_set.append('q' + str(j) + '0') + state_set.append('q' + str(j) + '1') + state_set.append('q' + str(j) + '2') + final.append('q' + str(j) + '2') + final.append('q' + str(j) + '1') + start = 'q' + str(j) + '2' + if not len(val1) == 1: + for k in range(0, len(track_dict[val1]["final_states"])): + transition.append({ + "start_state": + track_dict[val1]["final_states"][k], + "input": + '$', + "next_state": + track_dict[val1]["start_state"] + }) + final.append(track_dict[val1]["final_states"][k]) + transition.append({ + "start_state": 'q' + str(j) + '0', + "input": '$', + "next_state": track_dict[val1]["start_state"] + }) + state_set.append('q' + str(j) + '0') + start = 'q' + str(j) + '0' + final.append('q' + str(j) + '0') + track_dict[new_str] = {"start_state": start, "final_states": final} + thompson_stack.append(new_str) + + j += 1 + + if output_queue[i] == ".": + val2 = thompson_stack.pop() + val1 = thompson_stack.pop() + final = [] + start = '' + new_str = "new" + str(i) + if len(val1) == 1: + transition.append({ + "start_state": 'q' + str(j) + '0', + "input": val1, + "next_state": 'q' + str(j) + '1' + }) + state_set.append('q' + str(j) + '0') + state_set.append('q' + str(j) + '1') + if len(val2) == 1: + transition.append({ + "start_state": 'q' + str(j) + '2', + "input": val2, + "next_state": 'q' + str(j) + '3' + }) + state_set.append('q' + str(j) + '2') + state_set.append('q' + str(j) + '3') + + if len(val2) == 1 and (not len(val1) == 1): + for k in range(0, len(track_dict[val1]["final_states"])): + transition.append({ + "start_state": + track_dict[val1]["final_states"][k], + "input": + '$', + "next_state": + 'q' + str(j) + '2' + }) + final.append('q' + str(j) + '3') + start = track_dict[val1]["start_state"] + elif len(val2) == 1 and (len(val1) == 1): + transition.append({ + "start_state": 'q' + str(j) + '1', + "input": '$', + "next_state": 'q' + str(j) + '2' + }) + final.append('q' + str(j) + '3') + start = 'q' + str(j) + '0' + elif (not len(val2) == 1) and (len(val1) == 1): + transition.append({ + "start_state": 'q' + str(j) + '1', + "input": '$', + "next_state": track_dict[val2]["start_state"] + }) + for k in range(0, len(track_dict[val2]["final_states"])): + final.append(track_dict[val2]["final_states"][k]) + start = 'q' + str(j) + '0' + elif (not len(val2) == 1) and (not len(val1) == 1): + for k in range(0, len(track_dict[val1]["final_states"])): + transition.append({ + "start_state": + track_dict[val1]["final_states"][k], + "input": + '$', + "next_state": + track_dict[val2]["start_state"] + }) + for k in range(0, len(track_dict[val2]["final_states"])): + final.append(track_dict[val2]["final_states"][k]) + start = track_dict[val1]["start_state"] + track_dict[new_str] = {"start_state": start, "final_states": final} + thompson_stack.append(new_str) + + j += 1 + i += 1 + assign = thompson_stack.pop() + start_state = track_dict[assign]["start_state"] + final_states = track_dict[assign]["final_states"] + return state_set, start_state, final_states, alphabets, transition + + +output_queue = infix_to_postfix(regex) +state_set, start_state, final_states, alphabets, transition = thomopson( + output_queue) + +transition_function = [] +start_states = [start_state] +for i in range(0, len(transition)): + transition_function.append([ + transition[i]["start_state"], transition[i]["input"], + transition[i]["next_state"] + ]) + +answer = { + "states": state_set, + "letters": alphabets, + "transition_function": transition_function, + "start_states": start_states, + "final_states": final_states +} +with open(fname, 'w') as fp: + json.dump(answer, fp, indent=4) diff --git a/q2.py b/q2.py new file mode 100644 index 0000000..3c024fd --- /dev/null +++ b/q2.py @@ -0,0 +1,116 @@ +import numpy as np +import json +import math +import sys + + +def get_nfa(file_name): + # Get the input from the NFA JSON file + nfa = [] + with open(file_name, 'r') as f: + nfa = json.load(f) + return nfa + + +nfaname = sys.argv[1] +dfaname = sys.argv[2] + + +def get_powerset(state_set): + # Given a set, return its powerset + power_set_size = (int)(math.pow(2, len(state_set))) + i = 0 + j = 0 + power_set = [] + for i in range(0, power_set_size): + intermediate = [] + for j in range(0, len(state_set)): + if ((i & (1 << j)) > 0): + intermediate.append(state_set[j]) + power_set.append(intermediate) + return power_set + + +def get_next_states(state, action, transition): + next_states = [] + for i in range(0, len(transition)): + if (transition[i][0] == state) and (transition[i][1] == action): + next_states.append(transition[i][2]) + return next_states + + +def ephsilon_closure(state_set, transition): + # Given a state set, return its ephsilon closure + closure_stack = list(np.copy(state_set)) + closure = list(np.copy(state_set)) + while len(closure_stack) > 0: + val = closure_stack.pop() + next_states = get_next_states(val, '$', transition) + for r in next_states: + if r not in closure: + closure.append(r) + closure_stack.append(r) + return closure + + +def get_reachable_states(state, action, transition, ephsilon_closure): + reachable_states = [] + for i in range(0, len(transition)): + for j in range(0, len(state)): + if (transition[i][0] == state[j]) and (transition[i][1] == action): + for m in ephsilon_closure[transition[i][2]]: + if m not in reachable_states: + reachable_states.append(m) + return reachable_states + + +def construct_dfa(state_set, nfa, ephsilon_closure): + # given nfa and new state set, computes and returns dfa tuple + # append '$' to nfa's letters + transition = [] + for i in range(0, len(state_set)): + for l in range(0, len(nfa["letters"])): + # search the entry for transition function + # take ephsilon_closure(transition(ephsilon_closure(cur_state) , actions))) + next_states = get_reachable_states(state_set[i], nfa["letters"][l], + nfa["transition_function"], + ephsilon_closure) + transition.append([state_set[i], nfa["letters"][l], next_states]) + + # append final state unions as a part of final states + final_states = [] + for i in range(0, len(state_set)): + for j in range(0, len(nfa["final_states"])): + if nfa["final_states"][j] in state_set[i]: + final_states.append(state_set[i]) + break + + return state_set, nfa["letters"], transition, ephsilon_closure[ + nfa["start_states"][0]], final_states + + +def get_ephsilon_closure(nfa_states, transition): + # Given a state, make a dictionary with key being state and value being ephsilon closure + ephsilon_closure_set = {} + for r in nfa_states: + ephsilon_closure_set[r] = ephsilon_closure([r], transition) + return ephsilon_closure_set + + +nfa = get_nfa(nfaname) +ephsilon_closure_set = get_ephsilon_closure(nfa["states"], + nfa["transition_function"]) +power_set = get_powerset(nfa["states"]) +state_set, letters, transition_function, start_states, final_states = construct_dfa( + power_set, nfa, ephsilon_closure_set) + +answer = { + "states": state_set, + "letters": letters, + "transition_function": transition_function, + "start_states": [start_states], + "final_states": final_states +} + +with open(dfaname, 'w') as fp: + json.dump(answer, fp, indent=4) diff --git a/q3.py b/q3.py new file mode 100644 index 0000000..e7054b0 --- /dev/null +++ b/q3.py @@ -0,0 +1,182 @@ +import numpy as np +import json +import sys + +# Using the Transitive closure method + + +def get_dfa(file_name): + # Given a file, retrieve the DFA + dfa = [] + with open(file_name, 'r') as f: + dfa = json.load(f) + return dfa + + +dfaname = sys.argv[1] +regname = sys.argv[2] + + +def exists_transition(cur_state, letter, next_state, transition_function): + # Gives the entries in transition + for i in range(0, len(transition_function)): + if transition_function[i][0] == cur_state and transition_function[i][1] == letter and transition_function[i][2] == next_state: + return 1 + return 0 + + +def remove_multi_edges(transition_function, state_set, letters): + # Given a transition function, index it and return the index array + L = [['' for i in range(len(state_set))] for j in range(len(state_set))] + for r1 in state_set: + for r2 in state_set: + if r1 == r2: + L[r1][r2] = '$' + else: + L[r1][r2] = '∅' + for l in letters: + if exists_transition(r1, l, r2, transition_function): + L[r1][r2] = operate_plus(L[r1][r2], l) + return L + + +def initialize(transition_function, state_set, letters): + L = [[['' for i in range(len(state_set))] for j in range( + len(state_set))] for k in range(len(state_set))] + for r1 in state_set: + for r2 in state_set: + if r1 == r2: + L[0][r1][r2] = '$' + else: + L[0][r1][r2] = '∅' + for l in letters: + if exists_transition(r1, l, r2, transition_function): + L[0][r1][r2] = operate_plus(L[0][r1][r2], l) + return L + + +def star(symbol): + if symbol == '$': + return '$' + if symbol == '∅': + return '∅' + else: + return symbol + '*' + + +def operate_concat(symbol1, symbol2): + if symbol1 == '$': + return symbol2 + if symbol2 == '$': + return symbol1 + if symbol1 == '∅' or symbol2 == '∅': + return '∅' + return '(' + symbol1 + symbol2 + ')' + + +def operate_plus(symbol1, symbol2): + if symbol1 == '∅': + return symbol2 + if symbol2 == '∅': + return symbol1 + if symbol1 == '$' and symbol2 == '$': + return '$' + if symbol1 == symbol2: + return symbol1 + return '(' + symbol1 + '+' + symbol2 + ')' + + +def remove(k, L, state_set): + # Given a state k for removal, perform removal operations + + for r1 in state_set: + for r2 in state_set: + L[r1][r1] = operate_plus(L[r1][r1], operate_concat( + L[r1][k], operate_concat(star(L[k][k]), L[k][r1]))) + L[r2][r2] = operate_plus(L[r2][r2], operate_concat( + L[r2][k], operate_concat(star(L[k][k]), L[k][r2]))) + L[r1][r2] = operate_plus(L[r1][r2], operate_concat( + L[r1][k], operate_concat(star(L[k][k]), L[k][r2]))) + L[r2][r1] = operate_plus(L[r2][r1], operate_concat( + L[r2][k], operate_concat(star(L[k][k]), L[k][r1]))) + + return L + + +def algo_2(L, state_set): + for k in state_set: + for i in state_set: + for j in state_set: + if k > 0: + L[k][i][j] = operate_plus(L[k-1][i][j], operate_concat( + L[k - 1][i][k], operate_concat(star(L[k - 1][k][k]), L[k - 1][k][j]))) + if k == 0: + L[k][i][j] = operate_plus(L[k][i][j], operate_concat( + L[k][i][k], operate_concat(star(L[k][k][k]), L[k][k][j]))) + return L + + +def get_state_index(state, state_set): + for i in range(0, len(state_set)): + if set(state) == set(state_set[i]): + return i + + +def index(dfa): + # Given a dfa, index the dfa + new_states = [] + new_transition = [] + new_start_states = [] + new_final_states = [] + if dfa["start_states"][0] in dfa["final_states"]: + dfa["states"].append('Q1') + dfa["states"].append('Q2') + dfa["transition_function"].append(['Q1', '$', dfa["start_states"][0]]) + for m in dfa["final_states"]: + dfa["transition_function"].append([m, '$', 'Q2']) + dfa["final_states"] = ['Q2'] + dfa["start_states"][0] = 'Q1' + dfa["letters"].append('$') + for i in range(0, len(dfa["states"])): + new_states.append(i) + for m in dfa["transition_function"]: + new_transition.append( + [get_state_index(m[0], dfa["states"]), m[1], get_state_index(m[2], dfa["states"])]) + for m in dfa["start_states"]: + new_start_states.append(get_state_index(m, dfa["states"])) + for m in dfa["final_states"]: + new_final_states.append(get_state_index(m, dfa["states"])) + new_dfa = { + "states": new_states, + "letters": dfa["letters"], + "transition_function": new_transition, + "start_states": new_start_states, + "final_states": new_final_states + } + return new_dfa + + +dfa1 = get_dfa(dfaname) +dfa = index(dfa1) +L = initialize(dfa["transition_function"], + dfa["states"], dfa["letters"]) +# for i in range(0, len(L[0])): +# print(L[0][i]) +''' +for m in dfa["states"]: + if m not in dfa["start_states"] or m not in dfa["final_states"]: + remove(m, L, dfa["states"]) +''' +L1 = algo_2(L, dfa["states"]) + +s = dfa["start_states"][0] +ans = '∅' +last = len(dfa["states"]) - 1 +for m in dfa["states"]: + if m in dfa["final_states"]: + ans = operate_plus(ans, L1[last][s][m]) +answer = { + "regex": ans +} +with open(regname, 'w') as fp: + json.dump(answer, fp, indent=4) diff --git a/q4.py b/q4.py new file mode 100644 index 0000000..dfc2df9 --- /dev/null +++ b/q4.py @@ -0,0 +1,211 @@ +import numpy as np +import json +import math +import sys + + +def get_dfa(file_name): + # Given a file, retrieve the DFA + dfa = [] + with open(file_name, 'r') as f: + dfa = json.load(f) + return dfa + + +dfaname = sys.argv[1] +optname = sys.argv[2] + + +def get_index_from_state_set(state, state_set): + # Given state, return index of the state in state_set + for i in range(0, len(state_set)): + if set(state) == set(state_set[i]): + return i + + +def get_reachable_states(cur_state, l, transition): + # Given state and letter, get all reachable states form this state + reachable_states = [] + for r in transition: + if set(r[0]) == set(cur_state) and r[1] == l: + reachable_states.append(r[2]) + return reachable_states + + +def get_next_states(cur_state, l, transition): + # Given the state and letter, find the next state in transition + for m in transition: + if m[0] == cur_state and m[1] == l: + return m[2] + return None + + +def get_partition_index(p, state): + # Given the partition and a singleton state, return the state set it is a part of + for i in range(0, len(p)): + if state in p[i]: + return i + + +def dfa_reduction(dfa): + # Given dfa, return a new dfa which only contains reachable states + visited = [] + for m in dfa["states"]: + visited.append(0) + start_state = dfa["start_states"][0] + stack = [start_state] + letters = dfa["letters"] + state_set = dfa["states"] + new_state_set = [start_state] + transition = [] + while len(stack) > 0: + cur_state = stack.pop() + visited[get_index_from_state_set(cur_state, state_set)] = 1 + for l in letters: + reachable_states = get_reachable_states( + cur_state, l, dfa["transition_function"]) + for z in reachable_states: + if not visited[get_index_from_state_set(z, state_set)]: + stack.append(z) + if z not in new_state_set: + new_state_set.append(z) + transition.append([cur_state, l, z]) + + final_states = [] + for m in state_set: + if m in dfa["final_states"]: + final_states.append(m) + + new_dfa = { + "states": new_state_set, + "letters": letters, + "transition_function": transition, + "start_states": dfa["start_states"], + "final_states": final_states + } + + return new_dfa + + +def get_reachable_states_from_cur(partition, l, transition): + reachable_states_from = [] + for r in transition: + if r[2] in partition and r[1] == l: + reachable_states_from.append(r[0]) + return reachable_states_from + + +def intersection_exists(m, x): + for m1 in m: + if m1 in x: + return 1 + return 0 + + +def intersection(m, x): + intersection = [] + for m1 in m: + if m1 in x: + intersection.append(m1) + return intersection + + +def get_y(x, p): + y = [] + index = [] + for i in range(0, len(p)): + m = p[i] + if intersection_exists(m, x) and len(list(set(m) - set(x))) > 0: + y.append(m) + return y + + +def get_y_index(p, y): + for i in range(0, len(p)): + if set(p[i]) == set(y): + return i + return -1 + + +def optimize_dfa(dfa): + # Given a dfa, optimize the dfa + p = [] + w = [] + partition1 = [] + partition2 = [] + for m in dfa["states"]: + if m not in dfa["final_states"]: + partition1.append(m) + else: + partition2.append(m) + if partition1 != []: + p.append(partition1) + w.append(partition1) + if partition2 != []: + p.append(partition2) + w.append(partition2) + while len(w) > 0: + cur_partition = w.pop() + for l in dfa["letters"]: + x = get_reachable_states_from_cur( + cur_partition, l, dfa["transition_function"]) + y1 = get_y(x, p) + for y in y1: + del p[get_y_index(p, y)] + p.append(intersection(y, x)) + p.append(list(set(y) - set(x))) + if get_y_index(w, y) != -1: + del w[get_y_index(w, y)] + w.append(intersection(y, x)) + w.append(list(set(y) - set(x))) + else: + if len(intersection(x, y)) > len(list(set(y) - set(x))): + w.append(intersection(x, y)) + else: + w.append(list(set(y) - set(x))) + + return p + + +def create_new_dfa(p, transition, old_dfa): + # Given a parition and transition, return the new DFA + start_states = [] + for m in p: + if old_dfa["start_states"][0] in m: + start_states.append(m) + break + final_states = [] + for m in p: + for n in old_dfa["final_states"]: + if n in m: + if m not in final_states: + final_states.append(m) + letters = old_dfa["letters"] + state_set = p + transition_function = [] + for m in p: + # take any state in m, say m[0] + cur_state = m[0] + for l in letters: + # get transition function for the state,letter combination + next_state = get_next_states(cur_state, l, transition) + if next_state != None: + index = get_partition_index(p, next_state) + transition_function.append([m, l, p[index]]) + new_dfa = { + "states": state_set, + "letters": letters, + "transition_function": transition_function, + "start_states": start_states, + "final_states": final_states + } + return new_dfa + + +old_dfa = get_dfa(dfaname) +dfa = dfa_reduction(old_dfa) +p = optimize_dfa(dfa) +# Create a new dfa for these newly created partitions +new_dfa = create_new_dfa(p, dfa["transition_function"], old_dfa) +with open(optname, 'w') as fp: + json.dump(new_dfa, fp, indent=4)