-
Notifications
You must be signed in to change notification settings - Fork 1
/
bot2.py
142 lines (106 loc) · 4.4 KB
/
bot2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from bot1 import *
from dots_and_boxes import init_board
from itertools import combinations
import numpy as np
import json
WEIGHTS = np.array([2**i for i in xrange(2*BOARD_SIZE*(BOARD_SIZE-1))])
gamma = 1 # discount factor
RW = 10 # end score reward constant
def board2num(board):
b = list(sum(board,[]))
num = sum(WEIGHTS[i] for i in xrange(len(WEIGHTS)) if b[i])
return num
def edge2num(edge):
i,j = edge
num = np.floor(i/2)*(2*BOARD_SIZE-1) + j
if i % 2 == 1:
num += BOARD_SIZE-1
return(WEIGHTS[num])
def train_last_k(k):
board = init_board()
edges = []
for i, row in enumerate(board):
for j, val in enumerate(row):
board[i][j] = True
edges.append((i,j))
for m in xrange(k):
board_states = combinations(edges,m)
if m==0: #end states
board_num = str(board2num(board))
max_score = (BOARD_SIZE-1)**2
Qedge = [dict()]
Q = [[ dict() for i in xrange(max_score+1)]]
for score in xrange(max_score+1):
Q[m][score][board_num] = RW*(score-(max_score/2))
else:
# initiate row in Q-value lookup table
Q.append([ dict() for i in xrange(max_score - int(np.floor(m/4)))])
Qedge.append(dict())
for board_state in board_states:
# construct board from board state
for edge in board_state:
i,j = edge
board[i][j] = False
# find out how many squares are already filled
total_score = 0
for i in xrange(BOARD_SIZE-1):
for j in xrange(BOARD_SIZE-1):
sq_edges = get_edges((i,j),board)
if sum(sq_edges) == 4:
total_score += 1
# find the best move - it must be the same, regardless of score
# we will consider it to be total_score
potential_moves = []
for i, row in enumerate(board):
for j, val in enumerate(row):
if val == False:
gain = check_surrounding_squares(board,(i,j),3)
# move remembers edge inserted and score gain,
potential_moves.append(((i,j), gain))
# map potential_moves on Q-values
qmax = -np.inf
board_num = board2num(board)
for move in potential_moves:
edge,gain = move
edge_num = edge2num(edge)
if gain:
qval = gamma*Q[m-1][total_score+gain][str(board_num + edge_num)]
else:
qval = -gamma*Q[m-1][0][str(board_num + edge_num)]
if qmax < qval:
qmax = qval
best_edge = edge
# insert Q-values
for score_state in xrange(total_score+1):
Q[m][score_state][str(board_num)] = qmax - RW*(total_score-score_state)
# insert best move
Qedge[m][str(board_num)] = best_edge
for edge in board_state:
i,j = edge
board[i][j] = True
return(Q, Qedge)
def bot2_train_and_save(k):
Q,Qedge = train_last_k(k)
f = open('Q_' + str(k)+ '_'+ str(BOARD_SIZE) + '.txt','w')
json.dump(Q,f)
f.close()
f = open('Qedge_' + str(k)+ '_'+ str(BOARD_SIZE) + '.txt','w')
json.dump(Qedge,f)
f.close()
def bot2_load(k):
f = open('Qedge_' + str(k)+ '_'+ str(BOARD_SIZE) + '.txt','r')
Qedge = json.load(f)
f.close()
return(Qedge)
def bot2_move(board,Qedge,k):
# last k moves are best possible, the rest is played by always4never3 strategy
m = 0
for i, row in enumerate(board):
for j, val in enumerate(row):
if val == False: m += 1
if m < k:
board_num = board2num(board)
move = Qedge[m][str(board_num)]
else:
move = always4never3(board)
return(move)