-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathenvironment.py
118 lines (105 loc) · 4.08 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch, pdb, numpy, utils, random, skimage, maze, re
import gym
from torch.autograd import Variable
class EnvironmentWrapper:
def __init__(self, config):
self.config = config
if config.env == 'mountaincar':
self.env = gym.make('MountainCar-v0')
elif config.env == 'acrobot':
self.env = gym.make('Acrobot-v1')
elif 'maze' in config.env:
self.env = maze.MazeEnv(size=config.maze_size, time=100, holes=0, num_goal=1)
self.state_buffer = []
self.reward_buffer = []
self.counter = 0
self.smax = torch.tensor(self.env.observation_space.high)
self.smin = torch.tensor(self.env.observation_space.low)
def normalize(s):
return (s - self.smin) / (self.smax - self.smin)
def unnormalize(s):
return s * (self.smax - self.smin) + self.smin
def process_state(self, s, subsample=4):
if self.config.env == 'mountaincar' or ('acrobot' in self.config.env):
s = torch.from_numpy(s).float().squeeze()
s = (s - self.smin) / (self.smax - self.smin)
elif 'maze' in self.config.env:
s = torch.from_numpy(s).permute(2, 0, 1).clone().float()
return s
def reset(self):
self.counter = 0
self.state_buffer = []
for _ in range(self.config.n_input_frames):
state = self.process_state(self.env.reset(), self.config.image_subsample)
self.state_buffer.append(state)
return torch.stack(self.state_buffer)
def step(self, action):
total_reward = 0
for _ in range(self.config.n_action_repeat):
state, reward, done, info = self.env.step(action)[:4]
if self.config.env == 'mountaincar':
# this environment always returns reward -1, so hardcode reward 0 once done
position, velocity = state
reward = 0.0 if (position >= self.env.env.goal_position) else -1.0
elif 'acrobot' in self.config.env:
reward = 0.0 if self.env.env._terminal() else -1.0
total_reward += reward
if done:
break
state = self.process_state(state, self.config.image_subsample)
self.state_buffer.append(state)
self.state_buffer = self.state_buffer[-self.config.n_input_frames:]
return torch.stack(self.state_buffer), total_reward, done, info
def add_constants(config):
# Add constants specific to environment
if 'maze' in config.env:
config.maze_size = int(re.findall(r'\d+', config.env)[0])
config.n_input_channels = 3
config.n_input_frames = 1
config.n_actions = 4
config.height = config.maze_size
config.width = config.maze_size
config.image_subsample = 1
config.phi_layer_size = 25 * config.n_feature_maps
config.n_action_repeat = 1
config.edim = config.phi_layer_size
config.rmin = -0.5
config.rmax = 2
config.max_exploration_steps = 1000
config.input_type = 'image'
config.phi = 'learned'
elif config.env == 'mountaincar':
config.n_input_channels = 2
config.n_input_frames = 1
config.n_actions = 3
config.height = 1
config.width = 1
config.edim = 2
config.image_subsample = 1
config.phi_layer_size = 2
config.n_action_repeat = 1
config.phi='none'
config.spherenorm = 0
config.learn_radius = 0
config.rmin = -1.0
config.rmax = 0.0
config.input_type = 'features'
elif 'acrobot' in config.env:
config.n_input_channels = 6
config.n_input_frames = 1
config.n_actions = 3
config.height = 1
config.width = 1
config.edim = 6
config.image_subsample = 1
config.phi_layer_size = 6
config.n_action_repeat = 1
config.phi='none'
config.spherenorm = 0
config.learn_radius = 0
config.rmin = -1.0
config.rmax = 0.0
config.input_type = 'features'
else:
ValueError
return config