-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplotting.py
91 lines (77 loc) · 3.27 KB
/
plotting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# code copied from: https://github.com/dennybritz/reinforcement-learning/blob/master/lib/plotting.py
import matplotlib
import numpy as np
import pandas as pd
from collections import namedtuple
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot_value_function(V, title="Value Function",
directory = None, file_name = None,
show = False):
"""
Plots the value function as a surface plot.
code copied from: https://github.com/dennybritz/reinforcement-learning/blob/master/lib/plotting.py
"""
min_x = min(k[0] for k in V.keys())
max_x = max(k[0] for k in V.keys())
min_y = min(k[1] for k in V.keys())
max_y = max(k[1] for k in V.keys())
x_range = np.arange(min_x, max_x + 1)
y_range = np.arange(min_y, max_y + 1)
X, Y = np.meshgrid(x_range, y_range)
# Find value for all (x, y) coordinates
Z_noace = np.apply_along_axis(lambda _: V[(_[0], _[1], False)], 2,
np.dstack([X, Y]))
Z_ace = np.apply_along_axis(lambda _: V[(_[0], _[1], True)], 2,
np.dstack([X, Y]))
def plot_surface(X, Y, Z, title, usable_ace):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
cmap=matplotlib.cm.coolwarm, vmin=-1.0, vmax=1.5)
ax.set_xlabel('Player Sum', size=22)
ax.set_ylabel('Dealer Showing', size=22)
ax.set_zlabel('Value', size=22)
ax.set_title(title, size=22)
ax.view_init(ax.elev, -120)
fig.colorbar(surf)
# if file_name:
# fig.savefig(directory + usable_ace + file_name, bbox_inces = 'tight')
if show:
fig.show()
plt.close(fig)
plot_surface(X, Y, Z_noace, "{} (No Usable Ace)".format(title),
usable_ace = "noace_")
plot_surface(X, Y, Z_ace, "{} (Usable Ace)".format(title),
usable_ace = "ace_")
def plot_avg_reward_episode(path, env_types, ndecks):
"""
Function which plots the average return over episodes
path: path to the folder where the data resides
env_types: list with the env types you want to plot
ndecks: a list with the decks that you want to plot
save_path: optional path of where to save the fig.
"""
def load_df(path, env_type, ndecks):
assert env_type in ["hand_MC", "hand", "sum"]
path_to_file = "{}/{}_state_{}.txt".format(path, env_type, ndecks)
df = pd.read_table(path_to_file, sep=",")
df['env_type'] = env_type
df['ndecks'] = ndecks
return df
df_l = []
for env in env_types:
for deck in ndecks:
df_l.append(load_df(path, env, deck))
df = pd.concat(df_l)
fig, ax = plt.subplots(figsize=(8,6))
lab= []
for label, df in df.groupby(["env_type", "ndecks"]):
lab.append(label)
ax.plot(df['episode'], df['avg_reward'], label=label)
lgd = ax.legend(title="(State space, ndecks)", loc='upper center',
bbox_to_anchor=(0.5, -0.1),
shadow=False, ncol=2, framealpha=0.0, fontsize=22)
ax.set_xlabel("episode", size=22)
ax.set_ylabel("avg. reward", size=22)
return fig, lgd