-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTCGame_Env.py
183 lines (142 loc) · 7.18 KB
/
TCGame_Env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from gym import spaces
import numpy as np
import random
from itertools import groupby
from itertools import product
class TicTacToe:
def __init__(self):
"""initialise the board"""
# initialise state as an array
self.state = [np.nan for _ in range(9)]
# all possible numbers
self.all_possible_numbers = [i for i in range(1, len(self.state) + 1)]
self.reset()
def is_winning(self, curr_state):
"""Takes state as an input and returns whether any row, column or diagonal has winning sum
Example: Input state- [1, 2, 3, 4, nan, nan, nan, nan, nan]
Output = False"""
# define possible index collections in each of (horizontal, vertical, diagonal) directions
# 3 horizontal rows of a 3 X 3 playing board
horizontal_indices = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
# 3 vertical rows of a 3 X 3 playing board
vertical_indices = [[0, 3, 6], [1, 4, 7], [2, 5, 8]]
# 2 diagonal rows : (top left --> bottom right) & (top right --> bottom left)
diagonal_indices = [[0, 4, 8], [2, 4, 6]]
# sum across each group of indices should be equal to 15 to win the game
horizontal_sum = [
np.sum(np.array(curr_state)[i]) for i in horizontal_indices
]
vertical_sum = [
np.sum(np.array(curr_state)[i]) for i in vertical_indices
]
diagonal_sum = [
np.sum(np.array(curr_state)[i]) for i in diagonal_indices
]
horizontal_win = list(filter(lambda x: x == 15, horizontal_sum))
vertical_win = list(filter(lambda x: x == 15, vertical_sum))
diagonal_win = list(filter(lambda x: x == 15, diagonal_sum))
# game is won if sum across any direction is equal to 15
if len(horizontal_win) != 0 or len(vertical_win) != 0 or len(
diagonal_win) != 0:
return True
else:
return False
def is_terminal(self, curr_state):
# Terminal state could be winning state or when the board is filled up
if self.is_winning(curr_state) == True:
return True, "Win"
elif len(self.allowed_positions(curr_state)) == 0:
return True, "Tie"
else:
return False, "Resume"
def allowed_positions(self, curr_state):
"""Takes state as an input and returns all indexes that are blank"""
return [i for i, val in enumerate(curr_state) if np.isnan(val)]
def allowed_values(self, curr_state):
"""Takes the current state as input and returns all possible (unused) values that can be placed on the board"""
# fetch all allowed values used in the game
used_values = [val for val in curr_state if not np.isnan(val)]
# RL agent is only allowed to play odd numbers : {1,3,5,7,9}
# fetch numbers which an agent can still play i.e.
# odd numbers which have not been played by the agent so far
agent_values = [
val for val in self.all_possible_numbers
if val not in used_values and val % 2 != 0
]
# environment is only allowed to play even numbers : {2,4,6,8}
# fetch numbers which environment can still play i.e.
# even numbers which have not been played by the environment so far
env_values = [
val for val in self.all_possible_numbers
if val not in used_values and val % 2 == 0
]
return (agent_values, env_values)
def action_space(self, curr_state):
"""Takes the current state as input and returns all possible actions, i.e, all combinations of allowed positions and allowed values"""
allowed_positions = self.allowed_positions(curr_state)
allowed_values = self.allowed_values(curr_state)
# action space of a given space is the cartesian product of all allowed positions and allowed values
agent_actions = product(allowed_positions, allowed_values[0])
env_actions = product(allowed_positions, allowed_values[1])
return (agent_actions, env_actions)
def state_transition(self, curr_state, curr_action):
"""Takes current state and action and returns the board position just after agent's move.
Example: Input state- [1, 2, 3, 4, nan, nan, nan, nan, nan], action- [7, 9] or [position, value]
Output = [1, 2, 3, 4, nan, nan, nan, 9, nan]
"""
# current new state variable from existing state
new_state = [i for i in curr_state]
# update current action
new_state[curr_action[0]] = curr_action[1]
return new_state
def step(self, curr_state, curr_action):
"""Takes current state and action and returns the next state, reward and whether the state is terminal. Hint: First, check the board position after
agent's move, whether the game is won/loss/tied. Then incorporate environment's move and again check the board status.
Example: Input state- [1, 2, 3, 4, nan, nan, nan, nan, nan], action- [7, 9] or [position, value]
Output = ([1, 2, 3, 4, nan, nan, nan, 9, nan], -1, False)"""
# generate new state after agent's move
new_state = self.state_transition(curr_state, curr_action)
# check if terminal state has been reached i.e.
# either agent has won or it's a tie
has_reached_terminal_state, message = self.is_terminal(new_state)
if has_reached_terminal_state:
# set correct reward and message when game proceeds to a terminal state due to agent move
if message == "Win":
reward = 10
game_message = "Agent Won!"
else:
reward = 0
game_message = "It's a tie!"
return (new_state, reward, has_reached_terminal_state,
game_message)
else:
# game is not in terminal state
# generate random environment action
_, env_actions = self.action_space(new_state)
env_action = random.choice(
[ac for i, ac in enumerate(env_actions)])
# move to new state due to environment action
new_state_post_env_action = self.state_transition(
new_state, env_action)
# check if environment action results in a terminal state
has_reached_terminal_state, message = self.is_terminal(
new_state_post_env_action)
# decide whether environment has won, it's a tie or game can continue further
if has_reached_terminal_state:
if message == "Win":
reward = -10
game_message = "Environment Won!"
else:
reward = 0
game_message = "It's a tie!"
else:
reward = -1
game_message = "Resume"
return (
new_state_post_env_action,
reward,
has_reached_terminal_state,
game_message,
)
def reset(self):
return self.state