diff --git a/pfrl/agents/ddpg.py b/pfrl/agents/ddpg.py index 08c0748da..9319f5475 100644 --- a/pfrl/agents/ddpg.py +++ b/pfrl/agents/ddpg.py @@ -168,7 +168,7 @@ def compute_critic_loss(self, batch): loss = F.mse_loss(target_q, predict_q) # Update stats - self.critic_loss_record.append(float(loss.detach().cpu().numpy())) + self.critic_loss_record.append(loss.item()) return loss @@ -182,7 +182,7 @@ def compute_actor_loss(self, batch): # Update stats self.q_record.extend(q.detach().cpu().numpy()) - self.actor_loss_record.append(float(loss.detach().cpu().numpy())) + self.actor_loss_record.append(loss.item()) return loss diff --git a/pfrl/agents/soft_actor_critic.py b/pfrl/agents/soft_actor_critic.py index 75e8ce98a..57f76ae94 100644 --- a/pfrl/agents/soft_actor_critic.py +++ b/pfrl/agents/soft_actor_critic.py @@ -246,8 +246,8 @@ def update_q_func(self, batch): # Update stats self.q1_record.extend(predict_q1.detach().cpu().numpy()) self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1)) - self.q_func2_loss_record.append(float(loss2)) + self.q_func1_loss_record.append(loss1.item()) + self.q_func2_loss_record.append(loss2.item()) self.q_func1_optimizer.zero_grad() loss1.backward() diff --git a/pfrl/agents/td3.py b/pfrl/agents/td3.py index dc913f56d..e70e7e98b 100644 --- a/pfrl/agents/td3.py +++ b/pfrl/agents/td3.py @@ -213,8 +213,8 @@ def update_q_func(self, batch): # Update stats self.q1_record.extend(predict_q1.detach().cpu().numpy()) self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1)) - self.q_func2_loss_record.append(float(loss2)) + self.q_func1_loss_record.append(loss1.item()) + self.q_func2_loss_record.append(loss2.item()) self.q_func1_optimizer.zero_grad() loss1.backward() @@ -241,7 +241,7 @@ def update_policy(self, batch): # Since we want to maximize Q, loss is negation of Q loss = -torch.mean(q) - self.policy_loss_record.append(float(loss)) + self.policy_loss_record.append(loss.item()) self.policy_optimizer.zero_grad() loss.backward() if self.max_grad_norm is not None: