-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate.py
63 lines (60 loc) · 2.22 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def evaluate_mask(model, env, totalGames):
scores = 0
numOfGames = 0
numOfWins = 0
observation, info = env.reset()
while(numOfGames < totalGames):
invalidActions = env.action_masks()
action, states = model.predict(observation,action_masks=invalidActions,deterministic=False)
observation, reward, terminated,truncated, info = env.step(action)
env.render()
if terminated:
scores += env.score
numOfGames += 1
if env.score == env.WINNING_SCORE:
numOfWins += 1
observation, info = env.reset()
print(info)
return [scores / numOfGames, 100 * numOfWins / numOfGames]
def evaluate(model, env, totalGames):
scores = 0
numOfGames = 0
numOfWins = 0
timesteps = 0
observation, info = env.reset()
while(numOfGames < totalGames and timesteps < 10000):
action, states = model.predict(observation,deterministic=False)
observation, reward, terminated,truncated, info = env.step(action)
timesteps += 1
env.render()
if terminated:
scores += env.score
numOfGames += 1
if env.score == env.WINNING_SCORE:
numOfWins += 1
observation, info = env.reset()
print(info)
if numOfGames==0:#was unable to play one game(picked too many uncovered tiles)
return [0,0]
return [scores / numOfGames, 100 * numOfWins / numOfGames]
def evaluate_random(model, env, totalGames):
scores = 0
numOfGames = 0
numOfWins = 0
timesteps = 0
observation, info = env.reset()
while(numOfGames < totalGames and timesteps < 10000):
action = env.action_space.sample()
observation, reward, terminated,truncated, info = env.step(action)
timesteps += 1
env.render()
if terminated:
scores += env.score
numOfGames += 1
if env.score == env.WINNING_SCORE:
numOfWins += 1
observation, info = env.reset()
print(info)
if numOfGames==0:#was unable to play one game(picked too many uncovered tiles)
return [0,0]
return [scores / numOfGames, 100 * numOfWins / numOfGames]