Add PPO inference (#30)

yunjhongwu · Nov 24, 2023 · 9f2eebc · 9f2eebc
1 parent 0f5a774
commit 9f2eebc
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 4 deletions.
diff --git a/burn-rl/src/agent/ppo/agent.rs b/burn-rl/src/agent/ppo/agent.rs
@@ -25,8 +25,7 @@ impl<E: Environment, B: Backend, M: PPOModel<B>> Agent<E> for PPO<E, B, M> {
             self.model
                 .as_ref()
                 .unwrap()
-                .forward(to_state_tensor(*state).unsqueeze())
-                .policies,
+                .inference(to_state_tensor(*state).unsqueeze()),
         )
     }
 }

diff --git a/burn-rl/src/agent/ppo/model.rs b/burn-rl/src/agent/ppo/model.rs
@@ -13,4 +13,6 @@ impl<B: Backend> PPOOutput<B> {
     }
 }
 
-pub trait PPOModel<B: Backend>: Model<B, Tensor<B, 2>, PPOOutput<B>> {}
+pub trait PPOModel<B: Backend>: Model<B, Tensor<B, 2>, PPOOutput<B>> {
+    fn inference(&self, input: Tensor<B, 2>) -> Tensor<B, 2>;
+}
diff --git a/examples/src/ppo.rs b/examples/src/ppo.rs
@@ -42,7 +42,12 @@ impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>> for Net<B> {
     }
 }
 
-impl<B: Backend> PPOModel<B> for Net<B> {}
+impl<B: Backend> PPOModel<B> for Net<B> {
+    fn inference(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
+        let layer_0_output = relu(self.linear.forward(input));
+        softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
+    }
+}
 
 #[allow(unused)]
 const MEMORY_SIZE: usize = 512;