-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcts.py
181 lines (152 loc) · 8.86 KB
/
mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np
import evaluation
import game_functions as gf
class MCTS:
def __init__(self, board, mode='ucb'):
self.C_CONSTANT = 2 # You may change this parameter to scale the exploration term in the UCB formula.
self.SD_SCALE_PARAM = 8 # You may change this parameter to scale the depth to which the agent searches.
self.TM_SCALE_PARAM = 8 # You may change this parameter to scale the depth to which the agent searches.
self.SCALER_PARAM = 200 # You may change this parameter to scale the depth to which the agent searches.
self.UCB_SD_SCALE_PARAM = 10 # You may change this parameter to scale the depth to which the agent searches.
self.UCB_TM_SCALE_PARAM = 20 # You may change this parameter to scale the depth to which the agent searches.
self.UCB_SCALER_PARAM = 300 # You may change this parameter to scale the depth to which the agent searches.
self.board = board
self.mode = mode
def get_search_params(self, move_number: int) -> (int, int):
"""
Returns the depth to which the agent should search for the given move number.
...
:type move_number: int
:param move_number: The current move number.
:return: The depth to which the agent should search for the given move number.
"""
# TODO: Complete get_search_params function to return the depth to which the agent should search for the given move number.
# Hint: You may want to use the self.SD_SCALE_PARAM, self.SL_SCALE_PARAM, and self.SCALER_PARAM parameters.
# Hint: You may want to use the self.UCB_SPM_SCALE_PARAM, self.UCB_SL_SCALE_PARAM, and self.UCB_SCALER_PARAM parameters.
# Hint: You may want to use the self.mode parameter to check which mode the agent is on.
if self.mode == "mcts":
return (int(self.SD_SCALE_PARAM + move_number/self.SCALER_PARAM), int(self.TM_SCALE_PARAM + move_number/self.SCALER_PARAM))
else:
return (int(self.UCB_SD_SCALE_PARAM + move_number/self.UCB_SCALER_PARAM), int(self.UCB_TM_SCALE_PARAM + move_number/self.UCB_SCALER_PARAM))
# return (100, 30)
def ai_move(self, board, move_number):
search_depth, total_moves = self.get_search_params(move_number)
if self.mode == 'ucb':
best_move = self.mcts_v2(board, total_moves * 4, search_depth)
else:
best_move = self.mcts_v0(board, total_moves, search_depth)
return best_move
@staticmethod
def simulate_move(board: np.ndarray, search_depth: int) -> float:
"""
Returns the score of the given board state.
:param board: The board state for which the score is to be calculated.
:param search_depth: The depth to which the agent should search for the given board state.
:return: The score of the given board state.
"""
# TODO: Complete simulate_move function to simulate a move and return the score of the given board state.
# Hint: You may want to use the gf.random_move function to simulate a random move.
# Hint: You may want to use the evaluation.evaluate_state function to score a board.
# Hint: You may want to use the move_made returned from the gf.random_move function to check if a move was made.
# Hint: You may want to use the gf.add_new_tile function to add a new tile to the board.
# if search_depth == 0:
# return 0
# new_board, move_made, _ = gf.random_move(board)
# if not move_made:
# return evaluation.evaluate_state_mct(new_board)
# new_board = gf.add_new_tile(new_board)
# return MCTS.simulate_move(new_board, search_depth - 1)
# other approach
if search_depth == 0:
return np.sum(board)
new_board, move_made, _ = gf.random_move(board)
if not move_made:
return 0
new_board = gf.add_new_tile(new_board)
return MCTS.simulate_move(new_board, search_depth - 1)
def ucb(self, moves: list, total_visits: int) -> np.ndarray:
"""
Returns the UCB scores for the given moves.
:param moves: The moves for which the UCB scores are to be calculated.
:param total_visits: The total number of visits for all moves.
:return: The UCB scores for the given moves.
"""
# TODO: Complete ucb function to return the UCB scores for the given moves.
# Hint: You may want to use the self.C_CONSTANT parameter to scale the exploration term in the UCB formula.
# Hint: You may want to use np.inf to represent infinity.
# Hint: You may want to use np.sqrt to calculate the square root of a number.
# Hint: You may want to use np.log to calculate the natural logarithm of a number.
result = []
for move in moves:
rollouts = move[2]
score = move[1]
if rollouts == 0:
result.append(np.inf)
else:
result.append(score/rollouts + self.C_CONSTANT*np.sqrt(np.log(total_visits)/rollouts))
def mcts_v0(self, board: np.ndarray, total_moves: int, search_depth: int):
"""
Returns the best move for the given board state.
...
:type search_depth: int
:type total_moves: int
:type board: np.ndarray
:param board: The board state for which the best move is to be found.
:param total_moves: The total number of moves to be simulated.
:param search_depth: The depth to which the agent should search for the given board state.
:return: Returns the best move for the given board state.
"""
# TODO: Complete mcts_v0 function to return the best move for the given board state.
# Hint: You may want to use the gf.get_moves function to get all possible moves.
# Hint: You may want to use the gf.add_new_tile function to add a new tile to the board.
# Hint: You may want to use the self.simulate_move function to simulate a move.
# Hint: You may want to use the np.argmax function to get the index of the maximum value in an array.
# Hint: You may want to use the np.zeros function to create an array of zeros.
# Hint: You may want to use the np.copy function to create a copy of a numpy array.
best_move = gf.get_moves()[0] # default move in case that no move can be made
best_score = -np.inf
for new_board, direction in gf.get_all_possible_moves(board):
gf.add_new_tile(new_board)
total_score = 0
for i in range(total_moves):
total_score += MCTS.simulate_move(new_board, search_depth)
if total_score > best_score:
best_score = total_score
best_move = gf.get_moves()[direction]
return best_move
def mcts_v2(self, board, total_moves, search_depth):
"""
Returns the best move for the given board state.
...
:type search_depth: int
:type total_moves: int
:type board: np.ndarray
:param board: The board state for which the best move is to be found.
:param total_moves: The total number of moves to be simulated.
:param search_depth: The depth to which the agent should search for the given board state.
:return: Returns the best move for the given board state.
"""
# TODO: Complete mcts_v2 function to return the best move for the given board state.
# Hint: You may want to use the gf.get_moves function to get all possible moves.
# Hint: You may want to use the gf.add_new_tile function to add a new tile to the board.
# Hint: You may want to use the self.simulate_move function to simulate a move.
# Hint: You may want to use the np.argmax function to get the index of the maximum value in an array.
# Hint: You may want to use the np.copy function to create a copy of a numpy array.
# Hint: You may want to use the self.ucb function to get the UCB scores for the given moves.
possible_moves = gf.get_all_possible_moves(board) # [(new_board, direction), ...]
actions = [[move[1], 0, 0] for move in possible_moves] # [(direction, score, rollouts), ...]
if len(possible_moves) != 0:
for i in range(total_moves):
ucb_scores = self.ucb(actions, i)
index = np.argmax(ucb_scores)
score = MCTS.simulate_move(possible_moves[index][0], search_depth)
actions[index][1] += score
actions[index][2] += 1
rollouts = -np.inf
result = gf.get_moves()[0] # initialize action with a default move
# find the action which has more rollouts
for action in actions:
if action[2] > rollouts:
rollouts = action[2]
result = gf.get_moves()[action[0]]
return result