-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcatch_environment.py
67 lines (51 loc) · 1.89 KB
/
catch_environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from skimage.transform import resize
import numpy as np
class CatchEnv():
def __init__(self):
self.size = 21
self.image = np.zeros((self.size, self.size))
self.state = []
self.fps = 4
self.output_shape = (84, 84)
def reset_random(self):
self.image.fill(0)
self.pos = np.random.randint(2, self.size - 2)
self.vx = np.random.randint(5) - 2
self.vy = 1
self.ballx, self.bally = np.random.randint(self.size), 4
self.image[self.bally, self.ballx] = 1
self.image[-5, self.pos - 2:self.pos + 3] = np.ones(5)
return self.step(2)[0]
def step(self, action):
def left():
if self.pos > 3:
self.pos -= 2
def right():
if self.pos < 17:
self.pos += 2
def noop():
pass
{0: left, 1: right, 2: noop}[action]()
self.image[self.bally, self.ballx] = 0
self.ballx += self.vx
self.bally += self.vy
if self.ballx > self.size - 1:
self.ballx -= 2 * (self.ballx - (self.size - 1))
self.vx *= -1
elif self.ballx < 0:
self.ballx += 2 * (0 - self.ballx)
self.vx *= -1
self.image[self.bally, self.ballx] = 1
self.image[-5].fill(0)
self.image[-5, self.pos - 2:self.pos + 3] = np.ones(5)
terminal = self.bally == self.size - 1 - 4
reward = int(self.pos - 2 <= self.ballx <= self.pos + 2) if terminal else 0
[self.state.append(resize(self.image, (84, 84))) for _ in range(self.fps - len(self.state) + 1)]
self.state = self.state[-self.fps:]
return np.transpose(self.state, [1, 2, 0]), reward, terminal
def get_num_actions(self):
return 3
def reset(self):
return self.reset_random()
def state_shape(self):
return (self.fps,) + self.output_shape