-
Notifications
You must be signed in to change notification settings - Fork 0
/
game.py
90 lines (75 loc) · 3.85 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import logging
import numpy as np
import time
from go_board import *
from go_tree import *
from model import *
from hyper_params import *
from util import create_record
import go_board
GameMove = np.dtype([('state', go_board.GameState),
('action_distribution', np.float, ACTION_SPACE_SIZE),
('value', np.float)])
def self_play(game_index:int, model:Model, history_queue=None, noise_alpha=0.0, temperatures=[(0, np.inf, 0.0)]):
start_player = np.random.choice(2)
starting_state = go_board.create_zero_state(start_player)
start_time = time.time()
logging.info("Game: %d: start player: %d", game_index, starting_state.player)
player = GoPlayer(starting_state, model=model, noise_alpha=noise_alpha, temperatures=temperatures)
outcome, game_history = play_game(player_1=player, player_2=None)
if history_queue is not None:
for game_move in game_history:
history_queue.append(game_move)
end_time = time.time()
logging.info("Game: %d, outcome: %d, num_moves: %d, game_time(s): %.0f, end position:\n%s\n",
game_index, outcome, len(game_history), (end_time-start_time),
go_board.to_pretty_print(game_history[-1].state.pos[-1]))
return start_player, outcome
def two_player_play(game_index: int, model_a:Model, model_b:Model):
start_player = np.random.choice(2)
start_time = time.time()
logging.info("Eval start game %d: start player: %d", game_index, start_player)
starting_state = go_board.create_zero_state(start_player)
player_1 = GoPlayer(starting_state, model=model_a, noise_alpha=0.0, temperatures=[(0, np.inf, 0.0)])
player_2 = GoPlayer(starting_state, model=model_b, noise_alpha=0.0, temperatures=[(0, np.inf, 0.0)])
outcome, moves = play_game(player_1=player_1, player_2=player_2)
end_time = time.time()
logging.info("Eval end game %d, num_moves: %d: outcome: %d, game_time(s): %.0f, end position:\n%s",
game_index, len(moves), outcome, (end_time-start_time),
go_board.to_pretty_print(moves[-1].state.pos[-1]))
return start_player, outcome
def create_game_move_record(state, action_distribution, value):
return create_record(GameMove, value=(state, action_distribution, value))
def play_game(player_1=None, player_2=None):
"""
Plays the game from the starting_state to the end.
For each new state, it executes MCTS for `mcts_steps` steps.
Args:
Returns:
A vector of tuples (pi, s, z) with the moves played where
- pi is a probability distribution of the states
- s is state
- z is the outcome from the current player's perspective
"""
is_self_play = player_2 is None
players = { PLAYER_1: player_1, PLAYER_2: player_2 if not is_self_play else player_1 }
# play
game_history = []
current_state = player_1.root.state
outcome = None
move_index = 0
while outcome is None:
move_index += 1
current_player = int(current_state.player)
other_player = 1-current_player
act_distrib, action_taken, outcome = players[current_player].play(move_index)
if not is_self_play:
players[other_player].opponent_played(move_index, action_taken)
game_history.append(create_game_move_record(current_state, act_distrib, None))
current_state = players[current_player].root.state
logging.debug("Move %d, Player %d: action (%d, %d) -> otc: %s\n%s",
move_index, 1-current_state.player, *go_board.get_action_coords(action_taken),
outcome, go_board.to_pretty_print(current_state.pos[-1]))
for game_move in game_history:
game_move.value = GoPlayer.get_reward(outcome, game_move.state.player)
return outcome, game_history