From 603ef2e5ea72999d2ed59a9ecb516c7ef4af656a Mon Sep 17 00:00:00 2001 From: Andrew Hundt Date: Tue, 27 Aug 2019 15:10:10 -0400 Subject: [PATCH] trainer.py standard push grasp reward schedule --- trainer.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/trainer.py b/trainer.py index 69eb59a5..e72a7e83 100755 --- a/trainer.py +++ b/trainer.py @@ -19,22 +19,19 @@ def __init__(self, method, push_rewards, future_reward_discount, self.method = method self.place = place - # if self.place: - # self.push_reward = 0.0625 - # self.grasp_reward = 0.125 - # self.grasp_color_reward = 0.25 - # self.place_reward = 0.5 - # self.place_color_reward = 1.0 - # else: - # self.push_reward = 0.5 - # self.grasp_reward = 1.0 - - reward_schedule = (np.arange(5)**2/(2*np.max(np.arange(5)**2)))+0.5 - self.push_reward = reward_schedule[0] - self.grasp_reward = reward_schedule[1] - self.grasp_color_reward = reward_schedule[2] - self.place_reward = reward_schedule[3] - self.place_color_reward = reward_schedule[4] + if self.place: + # Stacking Reward Schedule + reward_schedule = (np.arange(5)**2/(2*np.max(np.arange(5)**2)))+0.5 + self.push_reward = reward_schedule[0] + self.grasp_reward = reward_schedule[1] + self.grasp_color_reward = reward_schedule[2] + self.place_reward = reward_schedule[3] + self.place_color_reward = reward_schedule[4] + else: + # Push Grasp Reward Schedule + self.push_reward = 0.5 + self.grasp_reward = 1.0 + # Check if CUDA can be used if torch.cuda.is_available() and not force_cpu: