-
Notifications
You must be signed in to change notification settings - Fork 1
/
domain.py
111 lines (85 loc) · 3.46 KB
/
domain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import random
EMPTY = 0
WALL = 1
GOAL = 2
AGENT = 3
class ToyWorld:
"""
10x10 world specified in world.txt
key: {'|' = wall, 'a' = agent (at initial location), 'g': allowable goal placement, 'x': empty}
"""
def __init__(self, goal_index, world_config = 'world.txt', max_steps = 70):
self.goal_index = None
self.agent_index = None
self.num_steps = None
self.max_steps = max_steps
self.world_config = world_config
self.possible_actions = ['up', 'down', 'right', 'left', 'stay']
self.goal_index = goal_index
self.state = self.s0(self.goal_index)
def set_goal(self, goal_index):
self.goal_index = goal_index
def get_full_state(self):
return self.state
def get_agent_state(self):
# returns location of agent
return int(np.where(self.state == AGENT)[0])
def reinitialize(self, goal_index):
self.state = self.s0(goal_index)
def s0(self, goal_index):
self.num_steps = 0
# parses the world_config file into a bag-of-words state vector
key = {'x': EMPTY, '|': WALL, 'g': GOAL, 'a': AGENT}
world_vector = []
with open(self.world_config, 'r') as world_file:
for i,line in enumerate(world_file):
world_vector += [key[i] for i in line.strip().split(' ')]
world_vector = np.array(world_vector)
self.goal_index = goal_index
self.agent_index = np.where(world_vector == AGENT)[0][0]
for i,value in enumerate(world_vector):
if value == GOAL and i != self.goal_index:
world_vector[i] = EMPTY
world_vector[self.goal_index] = GOAL
return world_vector
def step(self, action):
# updates state given the current state and action
self.num_steps += 1
if action == 'up':
new_index = self.agent_index - 10
if new_index >= 0 and self.state[new_index] != WALL:
self.state[self.agent_index] = EMPTY
self.state[new_index] = AGENT
self.agent_index = new_index
return self.is_terminal()
if action == 'down':
new_index = self.agent_index + 10
if new_index <= 99 and self.state[new_index] != WALL:
self.state[self.agent_index] = EMPTY
self.state[new_index] = AGENT
self.agent_index = new_index
return self.is_terminal()
# don't change state
if action == 'stay':
return self.is_terminal()
# for right and left, update index different, but same checking condition
if action == 'right':
new_index = self.agent_index + 1
if action == 'left':
new_index = self.agent_index - 1
# checks that right or left move doesn't go past leftmost and rightmost wall
if int(new_index/10) == int(self.agent_index/10) and self.state[new_index] != WALL:
self.state[self.agent_index] = EMPTY
self.state[new_index] = AGENT
self.agent_index = new_index
return self.is_terminal()
def reached_goal(self):
return self.agent_index == self.goal_index
def is_terminal(self):
# state is terminal when agent reaches goal or maxSteps reached
if self.agent_index == self.goal_index:
return True
if self.num_steps >= self.max_steps:
return True
return False