-
Notifications
You must be signed in to change notification settings - Fork 29
/
rlrobot.py
executable file
·183 lines (138 loc) · 6.14 KB
/
rlrobot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env python
# +-----------------------------------+-----------------------------------+
# | RL-ROBOT |
# | |
# | Copyright (c) 2016, Individual contributors, see AUTHORS file. |
# | Machine Perception and Intelligent Robotics (MAPIR), |
# | University of Malaga. <http://mapir.isa.uma.es> |
# | |
# | This program is free software: you can redistribute it and/or modify |
# | it under the terms of the GNU General Public License as published by |
# | the Free Software Foundation, either version 3 of the License, or |
# | (at your option) any later version. |
# | |
# | This program is distributed in the hope that it will be useful, |
# | but WITHOUT ANY WARRANTY; without even the implied warranty of |
# | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# | GNU General Public License for more details. |
# | |
# | You should have received a copy of the GNU General Public License |
# | along with this program. If not, see <http://www.gnu.org/licenses/>. |
# +-----------------------------------------------------------------------+
""" RL-ROBOT main script """
import signal
import sys
import time
from shutil import copyfile
import numpy as np
import exp
tasks_path = "tasks/"
results_path = "results/"
def run():
"""Perform experiments: setups, executions, save results"""
import sys
if sys.version_info[0] < 3:
sys.exit("Sorry, Python 3 required")
exp.check() # Check experiment parameters
# copy the selected taskfile to speed up the execution:
try:
copyfile("tasks/" + exp.TASK_ID + ".py", "task.py")
except OSError:
sys.exit("Task " + exp.TASK_ID + " not found. Please check exp.TASK_ID")
import lp
import robot
import save
import show
import task
task.setup()
caption = exp.TASK_ID + "_" + exp.ALGORITHM + "_" + exp.ACTION_STRATEGY
if exp.SUFFIX:
caption += "_" + exp.SUFFIX
save.new_dir(results_path, caption) # create result directory
epi = 0
# Average Reward per step (aveR):
ave_r = np.zeros((exp.N_REPETITIONS, exp.N_STEPS))
# Mean(aveR) of all tests per step
mean_ave_r = np.zeros(exp.N_STEPS)
# AveR per episode
epi_ave_r = np.zeros([exp.N_REPETITIONS, exp.N_EPISODES])
# actual step time per episode (for computational cost only)
actual_step_time = np.zeros(exp.N_REPETITIONS)
if exp.LEARN_FROM_MODEL:
import model
file_model = tasks_path + exp.FILE_MODEL + "/" + exp.FILE_MODEL
model.load(file_model, exp.N_EPISODES_MODEL)
else:
robot.connect() # Connect to V-REP / ROS
if exp.CONTINUE_PREVIOUS_EXP:
prev_exp = __import__(exp.PREVIOUS_EXP_FILE)
print("NOTE: Continue experiments from: " + exp.PREVIOUS_EXP_FILE)
time.sleep(3)
# Experiment repetition loop ------------------------------------------
for rep in range(exp.N_REPETITIONS):
if exp.CONTINUE_PREVIOUS_EXP:
last_q, last_v = prev_exp.q, prev_exp.v
last_policy, last_q_count = prev_exp.policy, prev_exp.q_count
else:
last_q = last_v = last_policy = last_q_count = None
# Episode loop ------------------
for epi in range(exp.N_EPISODES):
if exp.LEARN_FROM_MODEL:
print("Learning from Model")
task.STEP_TIME = 0
lp.step_time = 0
else:
robot.start()
show.process_count(caption, rep, epi, exp.EPISODIC)
lp.setup() # Learning process setup
if (exp.EPISODIC and epi > 0) or exp.CONTINUE_PREVIOUS_EXP:
lp.q, lp.v = last_q, last_v
lp.policy, lp.count = last_policy, last_q_count
lp.run() # Execute the learning process
if not exp.LEARN_FROM_MODEL:
robot.stop()
ave_r[rep] = lp.ave_r_step
actual_step_time[rep] = lp.actual_step_time
if exp.EPISODIC:
last_q, last_v = lp.q, lp.v
last_policy, last_q_count = lp.policy, lp.q_count
epi_ave_r[rep, epi] = lp.ave_r_step[lp.step]
if exp.EXPORT_SASR_step:
save.simple(lp.sasr_step, "SASR_step")
# end of episode
show.process_remaining(rep, epi)
mean_ave_r = np.mean(ave_r, axis=0)
# End of experiment repetition loop ----------------------------
# Mean of AveR per step (last episode)
save.plot_mean(mean_ave_r, epi)
save.simple(ave_r, "aveR")
# If EPISODIC: Save ave_r of last episode
if exp.EPISODIC:
# Mean of AveR reached (last step) per episode
mean_epi_ave_r = np.mean(epi_ave_r, axis=0)
save.plot_mean(mean_epi_ave_r, "ALL")
save.simple(epi_ave_r, "EPI")
final_r = mean_ave_r[lp.step]
final_actual_step_time = np.mean(actual_step_time)
save.log(final_r, final_actual_step_time)
save.arrays()
print("Mean average Reward = %0.2f" % final_r, "\n")
print("Mean actual step time (s): %0.6f" % final_actual_step_time, "\n")
if not exp.LEARN_FROM_MODEL:
robot.disconnect()
# ------------------------------------------------------------------------------
def signal_handler(sig, _):
"""capture Ctrl-C event and exists"""
sys.exit("\n Process interrupted (signal " + str(sig) + ")")
# capture SIGINT when ROS threads are present:
signal.signal(signal.SIGINT, signal_handler)
if __name__ == "__main__":
if exp.ENVIRONMENT_TYPE == "ROS":
print(" \n linking ROS ... ")
import rl_ros
rl_ros.setup() # create ROS talker and listener threads
time.sleep(0.1)
run()
rl_ros.finish() # finish ROS threads
else:
run()