diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 00fe9056..fa62b3c9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -101,3 +101,9 @@ jobs:
             cargo test --example sac_pendulum_tch --features=tch
             cargo test --example dqn_cartpole --features=candle-core
             cargo test --example sac_pendulum --features=candle-core
+            cd border-async-trainer; cargo test; cd ..
+            cd border-atari-env; cargo test; cd ..
+            cd border-candle-agent; cargo test; cd ..
+            cd border-tch-agent; cargo test; cd ..
+            cd border-policy-no-backend; cargo test --features=border-tch-agent; cd ..
+            cd border-py-gym-env; cargo test; cd ..
diff --git a/CHANGELOG.md b/CHANGELOG.md
index abba777b..d912549b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,33 +4,35 @@
 
 ### Added
 
-* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2)
-* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1)
-* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12)
+* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
+* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1).
+* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18).
+* Add crate `border-policy-no-backend`.
 
 ### Changed
 
-* Take `self` in the signature of `push()` method of replay buffer (`border-core`)
-* Fix a bug in `MlpConfig` (`border-tch-agent`)
-* Bump the version of tch to 0.16.0 (`border-tch-agent`)
-* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`)
-* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10)
+* Take `self` in the signature of `push()` method of replay buffer (`border-core`).
+* Fix a bug in `MlpConfig` (`border-tch-agent`).
+* Bump the version of tch to 0.16.0 (`border-tch-agent`).
+* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`).
+* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10).
+* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12).
 
 ## v0.0.6 (2023-09-19)
 
 ### Added
 
 * Docker files (`border`).
-* Singularity files (`border`)
-* Script for GPUSOROBAN (#67)
+* Singularity files (`border`).
+* Script for GPUSOROBAN (#67).
 * `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`.
 * Example of asynchronous trainer for native Atari environment and DQN (`border/examples`).
-* Move tensorboard recorder into a separate crate (`border-tensorboard`)
+* Move tensorboard recorder into a separate crate (`border-tensorboard`).
 
 ### Changed
 
 * Bump the version of tch-rs to 0.8.0 (`border-tch-agent`).
 * Rename agents as following the convention in Rust (`border-tch-agent`).
-* Bump the version of gym to 0.26 (`border-py-gym-env`)
-* Remove the type parameter for array shape of gym environments (`border-py-gym-env`)
-* Interface of Python-Gym interface (`border-py-gym-env`)
+* Bump the version of gym to 0.26 (`border-py-gym-env`).
+* Remove the type parameter for array shape of gym environments (`border-py-gym-env`).
+* Interface of Python-Gym interface (`border-py-gym-env`).
diff --git a/Cargo.toml b/Cargo.toml
index 02011f64..551830af 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ members = [
     "border-derive",
     "border-atari-env",
     "border-async-trainer",
+    "border-policy-no-backend",
     "border",
 ]
 exclude = ["docker/"]
@@ -49,9 +50,10 @@ segment-tree = "2.0.0"
 image = "0.23.14"
 candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] }
 candle-nn = "0.4.1"
-rand = "0.8.5"
+rand = { version = "0.8.5", features = ["small_rng"] }
 itertools = "0.12.1"
 ordered-float = "4.2.0"
 reqwest = { version = "0.11.26", features = ["json", "blocking"] }
 xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
-candle-optimisers = "0.4.0"
\ No newline at end of file
+candle-optimisers = "0.4.0"
+bincode = "1.3.3"
diff --git a/border-async-trainer/src/actor/base.rs b/border-async-trainer/src/actor/base.rs
index 297a6f4f..95d0fdb3 100644
--- a/border-async-trainer/src/actor/base.rs
+++ b/border-async-trainer/src/actor/base.rs
@@ -10,7 +10,7 @@ use std::{
 };
 
 #[cfg_attr(doc, aquamarine::aquamarine)]
-/// Runs interaction between an [`Agent`] and an [`Env`], then generates transitions.
+/// Generate transitions by running [`Agent`] in [`Env`].
 ///
 /// ```mermaid
 /// flowchart TB
@@ -23,15 +23,18 @@ use std::{
 ///   C-->|ReplayBufferBase::PushedItem|F[ReplayBufferProxy]
 /// ```
 ///
-/// This diagram shows interaction of [`Agent`], [`Env`] and [`StepProcessor`],
-/// as shown in [`border_core::Trainer`]. However, this diagram also shows that
+/// In [`Actor`], an [`Agent`] runs on an [`Env`] and generates [`Step`] objects. 
+/// These objects are processed with [`StepProcessor`] and sent to [`ReplayBufferProxy`].
 /// The [`Agent`] in the [`Actor`] periodically synchronizes with the [`Agent`] in
-/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`], and the transitions generated by
-/// [`StepProcessor`] are sent to the [`ReplayBufferProxy`].
+/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`].
 ///
 /// See also the diagram in [`AsyncTrainer`].
 ///
 /// [`AsyncTrainer`]: crate::AsyncTrainer
+/// [`Agent`]: border_core::Agent
+/// [`Env`]: border_core::Env
+/// [`StepProcessor`]: border_core::StepProcessor
+/// [`Step`]: border_core::Step
 pub struct Actor<A, E, P, R>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
@@ -70,6 +73,7 @@ where
         env_seed: i64,
         stats: Arc<Mutex<Option<ActorStat>>>,
     ) -> Self {
+        log::info!("Create actor {}", id);
         Self {
             id,
             stop,
diff --git a/border-async-trainer/src/actor/stat.rs b/border-async-trainer/src/actor/stat.rs
index bc989ffc..3fb26199 100644
--- a/border-async-trainer/src/actor/stat.rs
+++ b/border-async-trainer/src/actor/stat.rs
@@ -1,12 +1,12 @@
 use std::time::Duration;
 
-/// Stats of sampling process in each [`Actor`](crate::Actor).
+/// Stats of sampling process in an [`Actor`](crate::Actor).
 #[derive(Clone, Debug)]
 pub struct ActorStat {
     /// The number of steps for interaction between agent and env.
     pub env_steps: usize,
 
-    /// Duration of sampling loop in [`Actor`](crate::Actor).
+    /// Duration of sampling loop in the [`Actor`](crate::Actor).
     pub duration: Duration,
 }
 
diff --git a/border-async-trainer/src/actor_manager.rs b/border-async-trainer/src/actor_manager.rs
index 60304b11..546371ff 100644
--- a/border-async-trainer/src/actor_manager.rs
+++ b/border-async-trainer/src/actor_manager.rs
@@ -1,4 +1,4 @@
-//! A manager of [Actor]()s.
+//! A manager of [`Actor`](crate::Actor)s.
 mod base;
 mod config;
 pub use base::ActorManager;
diff --git a/border-async-trainer/src/actor_manager/base.rs b/border-async-trainer/src/actor_manager/base.rs
index cbfee205..2170921b 100644
--- a/border-async-trainer/src/actor_manager/base.rs
+++ b/border-async-trainer/src/actor_manager/base.rs
@@ -15,9 +15,11 @@ use std::{
 /// Manages [`Actor`]s.
 ///
 /// This struct handles the following requests:
-/// * From the [LearnerManager]() for updating the latest model info, stored in this struct.
+/// * From the [`AsyncTrainer`] for updating the latest model info, stored in this struct.
 /// * From the [`Actor`]s for getting the latest model info.
 /// * From the [`Actor`]s for pushing sample batch to the `LearnerManager`.
+///
+/// [`AsyncTrainer`]: crate::AsyncTrainer
 pub struct ActorManager<A, E, R, P>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
@@ -25,10 +27,10 @@ where
     P: StepProcessor<E>,
     R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
 {
-    /// Configurations of [Agent]s.
+    /// Configurations of [`Agent`]s.
     agent_configs: Vec<A::Config>,
 
-    /// Configuration of [Env].
+    /// Configuration of [`Env`].
     env_config: E::Config,
 
     /// Configuration of a `StepProcessor`.
@@ -77,7 +79,7 @@ where
     R::Item: Send + 'static,
     A::ModelInfo: Send + 'static,
 {
-    /// Builds a [ActorManager].
+    /// Builds a [`ActorManager`].
     pub fn build(
         config: &ActorManagerConfig,
         agent_configs: &Vec<A::Config>,
@@ -103,10 +105,10 @@ where
         }
     }
 
-    /// Runs threads for [Actor]s and a thread for sending samples into the replay buffer.
+    /// Runs threads for [`Actor`]s and a thread for sending samples into the replay buffer.
     ///
-    /// A thread will wait for the initial [SyncModel::ModelInfo] from [AsyncTrainer](crate::AsyncTrainer),
-    /// which blocks execution of [Actor] threads.
+    /// Each thread is blocked until receiving the initial [`SyncModel::ModelInfo`]
+    /// from [`AsyncTrainer`](crate::AsyncTrainer).
     pub fn run(&mut self, guard_init_env: Arc<Mutex<bool>>) {
         // Guard for sync of the initial model
         let guard_init_model = Arc::new(Mutex::new(true));
@@ -220,10 +222,11 @@ where
             // TODO: error handling, timeout
             // TODO: caching
             // TODO: stats
-            let msg = receiver.recv().unwrap();
-            _n_samples += 1;
-            sender.try_send(msg).unwrap();
-            // println!("{:?}", (_msg.id, n_samples));
+            let msg = receiver.recv();
+            if msg.is_ok() {
+                _n_samples += 1;
+                sender.try_send(msg.unwrap()).unwrap();    
+            }
 
             // Stop the loop
             if *stop.lock().unwrap() {
diff --git a/border-async-trainer/src/async_trainer/base.rs b/border-async-trainer/src/async_trainer/base.rs
index 2a16f5de..ffcc001d 100644
--- a/border-async-trainer/src/async_trainer/base.rs
+++ b/border-async-trainer/src/async_trainer/base.rs
@@ -33,25 +33,26 @@ use std::{
 ///   end
 /// ```
 ///
-/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
-///   [`ReplayBufferBase::Item`], in parallel and push the transitions into
-///   [`ReplayBufferProxy`]. It should be noted that [`ReplayBufferProxy`] has a
-///   type parameter of [`ReplayBufferBase`] and the proxy accepts
-///   [`ReplayBufferBase::Item`].
-/// * The proxy sends the transitions into the replay buffer, implementing
-///   [`ReplayBufferBase`], in the [`AsyncTrainer`].
-/// * The [`Agent`] in [`AsyncTrainer`] trains its model parameters by using batches
+/// * The [`Agent`] in [`AsyncTrainer`] (left) is trained with batches
 ///   of type [`ReplayBufferBase::Batch`], which are taken from the replay buffer.
 /// * The model parameters of the [`Agent`] in [`AsyncTrainer`] are wrapped in
 ///   [`SyncModel::ModelInfo`] and periodically sent to the [`Agent`]s in [`Actor`]s.
-///   [`Agent`] must implement [`SyncModel`] to synchronize its model.
+///   [`Agent`] must implement [`SyncModel`] to synchronize the model parameters.
+/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
+///   [`ReplayBufferBase::Item`], and push the transitions into
+///   [`ReplayBufferProxy`].
+/// * [`ReplayBufferProxy`] has a type parameter of [`ReplayBufferBase`] and the proxy accepts
+///   [`ReplayBufferBase::Item`].
+/// * The proxy sends the transitions into the replay buffer in the [`AsyncTrainer`].
 ///
 /// [`ActorManager`]: crate::ActorManager
 /// [`Actor`]: crate::Actor
 /// [`ReplayBufferBase::Item`]: border_core::ReplayBufferBase::PushedItem
+/// [`ReplayBufferBase::Batch`]: border_core::ReplayBufferBase::PushedBatch
 /// [`ReplayBufferProxy`]: crate::ReplayBufferProxy
 /// [`ReplayBufferBase`]: border_core::ReplayBufferBase
 /// [`SyncModel::ModelInfo`]: crate::SyncModel::ModelInfo
+/// [`Agent`]: border_core::Agent
 pub struct AsyncTrainer<A, E, R>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
@@ -266,11 +267,8 @@ where
         };
         let mut agent = A::build(self.agent_config.clone());
         let mut buffer = R::build(&self.replay_buffer_config);
-        // let buffer = Arc::new(Mutex::new(R::build(&self.replay_buffer_config)));
         agent.train();
 
-        // self.run_replay_buffer_thread(buffer.clone());
-
         let mut max_eval_reward = f32::MIN;
         let mut opt_steps = 0;
         let mut samples = 0;
@@ -294,7 +292,6 @@ where
 
             // Add stats wrt computation cost
             if opt_steps % self.record_compute_cost_interval == 0 {
-                // record.insert("fps", Scalar(sampler.fps()));
                 record.insert("opt_steps_per_sec", Scalar(self.opt_steps_per_sec()));
             }
 
diff --git a/border-async-trainer/src/async_trainer/config.rs b/border-async-trainer/src/async_trainer/config.rs
index 48498e36..95f5b115 100644
--- a/border-async-trainer/src/async_trainer/config.rs
+++ b/border-async-trainer/src/async_trainer/config.rs
@@ -6,7 +6,7 @@ use std::{
     path::Path,
 };
 
-/// Configuration of [AsyncTrainer](crate::AsyncTrainer)
+/// Configuration of [`AsyncTrainer`](crate::AsyncTrainer).
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct AsyncTrainerConfig {
     /// The maximum number of optimization steps.
@@ -56,3 +56,19 @@ impl AsyncTrainerConfig {
         Ok(self)
     }
 }
+
+impl Default for AsyncTrainerConfig {
+    /// There is no special intention behind these initial values.
+    fn default() -> Self {
+        Self {
+            max_opts: 10, //000,
+            model_dir: None,
+            eval_interval: 5000,
+            flush_record_interval: 5000,
+            record_compute_cost_interval: 5000,
+            save_interval: 50000,
+            sync_interval: 100,
+            warmup_period: 10000,
+        }
+    }
+}
diff --git a/border-async-trainer/src/lib.rs b/border-async-trainer/src/lib.rs
index d6747291..ce775418 100644
--- a/border-async-trainer/src/lib.rs
+++ b/border-async-trainer/src/lib.rs
@@ -2,61 +2,127 @@
 //!
 //! The code might look like below.
 //!
-//! ```ignore
-//! fn train() {
-//!     let agent_configs: Vec<_> = vec![agent_config()];
-//!     let env_config_train = env_config(name);
-//!     let env_config_eval = env_config(name).eval();
-//!     let replay_buffer_config = load_replay_buffer_config(model_dir.as_str())?;
-//!     let step_proc_config = SimpleStepProcessorConfig::default();
-//!     let actor_man_config = ActorManagerConfig::default();
-//!     let async_trainer_config = load_async_trainer_config(model_dir.as_str())?;
-//!     let mut recorder = TensorboardRecorder::new(model_dir);
-//!     let mut evaluator = Evaluator::new(&env_config_eval, 0, 1)?;
-//!
-//!     // Shared flag to stop actor threads
-//!     let stop = Arc::new(Mutex::new(false));
+//! ```
+//! # use serde::{Deserialize, Serialize};
+//! # use border_core::test::{
+//! #     TestAgent, TestAgentConfig, TestEnv, TestObs, TestObsBatch,
+//! #     TestAct, TestActBatch
+//! # };
+//! # use border_async_trainer::{
+//! #     //test::{TestAgent, TestAgentConfig, TestEnv},
+//! #     ActorManager, ActorManagerConfig, AsyncTrainer, AsyncTrainerConfig,
+//! # };
+//! # use border_core::{
+//! #     generic_replay_buffer::{
+//! #         SimpleReplayBuffer, SimpleReplayBufferConfig,
+//! #         SimpleStepProcessorConfig, SimpleStepProcessor
+//! #     },
+//! #     record::{AggregateRecorder, NullRecorder}, DefaultEvaluator,
+//! # };
+//! #
+//! # fn agent_config() -> TestAgentConfig {
+//! #     TestAgentConfig
+//! # }
+//! #
+//! # fn env_config() -> usize {
+//! #     0
+//! # }
+//! 
+//! type Env = TestEnv;
+//! type ObsBatch = TestObsBatch;
+//! type ActBatch = TestActBatch;
+//! type ReplayBuffer = SimpleReplayBuffer<ObsBatch, ActBatch>;
+//! type StepProcessor = SimpleStepProcessor<Env, ObsBatch, ActBatch>;
+//! 
+//! // Create a new agent by wrapping the existing agent in order to implement SyncModel.
+//! struct TestAgent2(TestAgent);
+//! 
+//! impl border_core::Configurable<Env> for TestAgent2 {
+//!     type Config = TestAgentConfig;
+//! 
+//!     fn build(config: Self::Config) -> Self {
+//!         Self(TestAgent::build(config))
+//!     }
+//! }
+//! 
+//! impl border_core::Agent<Env, ReplayBuffer> for TestAgent2 {
+//!     // Boilerplate code to delegate the method calls to the inner agent.
+//!     fn train(&mut self) {
+//!         self.0.train();
+//!      }
 //!
-//!     // Creates channels
-//!     let (item_s, item_r) = unbounded(); // items pushed to replay buffer
-//!     let (model_s, model_r) = unbounded(); // model_info
+//!      // For other methods ...
+//! #     fn is_train(&self) -> bool {
+//! #         self.0.is_train()
+//! #     }
+//! # 
+//! #     fn eval(&mut self) {
+//! #         self.0.eval();
+//! #     }
+//! # 
+//! #     fn opt_with_record(&mut self, buffer: &mut ReplayBuffer) -> border_core::record::Record {
+//! #         self.0.opt_with_record(buffer)
+//! #     }
+//! # 
+//! #     fn save_params<T: AsRef<std::path::Path>>(&self, path: T) -> anyhow::Result<()> {
+//! #         self.0.save_params(path)
+//! #     }
+//! # 
+//! #     fn load_params<T: AsRef<std::path::Path>>(&mut self, path: T) -> anyhow::Result<()> {
+//! #         self.0.load_params(path)
+//! #     }
+//! # 
+//! #     fn opt(&mut self, buffer: &mut ReplayBuffer) {
+//! #         self.0.opt_with_record(buffer);
+//! #     }
+//! }
 //!
-//!     // guard for initialization of envs in multiple threads
-//!     let guard_init_env = Arc::new(Mutex::new(true));
+//! impl border_core::Policy<Env> for TestAgent2 {
+//!       // Boilerplate code to delegate the method calls to the inner agent.
+//!       // ...
+//! #     fn sample(&mut self, obs: &TestObs) -> TestAct {
+//! #         self.0.sample(obs)
+//! #     }
+//! }
+//! 
+//! impl border_async_trainer::SyncModel for TestAgent2{
+//!     // Self::ModelInfo shold include the model parameters.
+//!     type ModelInfo = usize;
+//! 
 //!
-//!     // Actor manager and async trainer
-//!     let mut actors = ActorManager::build(
-//!         &actor_man_config,
-//!         &agent_configs,
-//!         &env_config_train,
-//!         &step_proc_config,
-//!         item_s,
-//!         model_r,
-//!         stop.clone(),
-//!     );
-//!     let mut trainer = AsyncTrainer::build(
-//!         &async_trainer_config,
-//!         &agent_config,
-//!         &env_config_eval,
-//!         &replay_buffer_config,
-//!         item_r,
-//!         model_s,
-//!         stop.clone(),
-//!     );
+//!     fn model_info(&self) -> (usize, Self::ModelInfo) {
+//!         // Extracts the model parameters and returns them as Self::ModelInfo.
+//!         // The first element of the tuple is the number of optimization steps.
+//!         (0, 0)
+//!     }
 //!
-//!     // Set the number of threads
-//!     tch::set_num_threads(1);
+//!     fn sync_model(&mut self, _model_info: &Self::ModelInfo) {
+//!         // implements synchronization of the model based on the _model_info
+//!     }
+//! }
 //!
-//!     // Starts sampling and training
-//!     actors.run(guard_init_env.clone());
-//!     let stats = trainer.train(&mut recorder, &mut evaluator, guard_init_env);
-//!     println!("Stats of async trainer");
-//!     println!("{}", stats.fmt());
+//! let agent_configs: Vec<_> = vec![agent_config()];
+//! let env_config_train = env_config();
+//! let env_config_eval = env_config();
+//! let replay_buffer_config = SimpleReplayBufferConfig::default();
+//! let step_proc_config = SimpleStepProcessorConfig::default();
+//! let actor_man_config = ActorManagerConfig::default();
+//! let async_trainer_config = AsyncTrainerConfig::default();
+//! let mut recorder: Box<dyn AggregateRecorder> = Box::new(NullRecorder {});
+//! let mut evaluator = DefaultEvaluator::<TestEnv, TestAgent2>::new(&env_config_eval, 0, 1).unwrap();
 //!
-//!     let stats = actors.stop_and_join();
-//!     println!("Stats of generated samples in actors");
-//!     println!("{}", actor_stats_fmt(&stats));
-//! }
+//! border_async_trainer::util::train_async::<_, _, _, StepProcessor>(
+//!     &agent_config(),
+//!     &agent_configs,
+//!     &env_config_train,
+//!     &env_config_eval,
+//!     &step_proc_config,
+//!     &replay_buffer_config,
+//!     &actor_man_config,
+//!     &async_trainer_config,
+//!     &mut recorder,
+//!     &mut evaluator,
+//! );
 //! ```
 //!
 //! Training process consists of the following two components:
@@ -89,6 +155,7 @@ mod messages;
 mod replay_buffer_proxy;
 mod sync_model;
 pub mod util;
+
 pub use actor::{actor_stats_fmt, Actor, ActorStat};
 pub use actor_manager::{ActorManager, ActorManagerConfig};
 pub use async_trainer::{AsyncTrainStat, AsyncTrainer, AsyncTrainerConfig};
@@ -96,3 +163,226 @@ pub use error::BorderAsyncTrainerError;
 pub use messages::PushedItemMessage;
 pub use replay_buffer_proxy::{ReplayBufferProxy, ReplayBufferProxyConfig};
 pub use sync_model::SyncModel;
+
+/// Agent and Env for testing.
+#[cfg(test)]
+pub mod test {
+    use serde::{Deserialize, Serialize};
+
+    /// Obs for testing.
+    #[derive(Clone, Debug)]
+    pub struct TestObs {
+        obs: usize,
+    }
+
+    impl border_core::Obs for TestObs {
+        fn dummy(_n: usize) -> Self {
+            Self { obs: 0 }
+        }
+
+        fn len(&self) -> usize {
+            1
+        }
+    }
+
+    /// Batch of obs for testing.
+    pub struct TestObsBatch {
+        obs: Vec<usize>,
+    }
+
+    impl border_core::generic_replay_buffer::BatchBase for TestObsBatch {
+        fn new(capacity: usize) -> Self {
+            Self {
+                obs: vec![0; capacity],
+            }
+        }
+
+        fn push(&mut self, i: usize, data: Self) {
+            self.obs[i] = data.obs[0];
+        }
+
+        fn sample(&self, ixs: &Vec<usize>) -> Self {
+            let obs = ixs.iter().map(|ix| self.obs[*ix]).collect();
+            Self { obs }
+        }
+    }
+
+    impl From<TestObs> for TestObsBatch {
+        fn from(obs: TestObs) -> Self {
+            Self { obs: vec![obs.obs] }
+        }
+    }
+
+    /// Act for testing.
+    #[derive(Clone, Debug)]
+    pub struct TestAct {
+        act: usize,
+    }
+
+    impl border_core::Act for TestAct {}
+
+    /// Batch of act for testing.
+    pub struct TestActBatch {
+        act: Vec<usize>,
+    }
+
+    impl From<TestAct> for TestActBatch {
+        fn from(act: TestAct) -> Self {
+            Self { act: vec![act.act] }
+        }
+    }
+
+    impl border_core::generic_replay_buffer::BatchBase for TestActBatch {
+        fn new(capacity: usize) -> Self {
+            Self {
+                act: vec![0; capacity],
+            }
+        }
+
+        fn push(&mut self, i: usize, data: Self) {
+            self.act[i] = data.act[0];
+        }
+
+        fn sample(&self, ixs: &Vec<usize>) -> Self {
+            let act = ixs.iter().map(|ix| self.act[*ix]).collect();
+            Self { act }
+        }
+    }
+
+    /// Info for testing.
+    pub struct TestInfo {}
+
+    impl border_core::Info for TestInfo {}
+
+    /// Environment for testing.
+    pub struct TestEnv {
+        state_init: usize,
+        state: usize,
+    }
+
+    impl border_core::Env for TestEnv {
+        type Config = usize;
+        type Obs = TestObs;
+        type Act = TestAct;
+        type Info = TestInfo;
+
+        fn reset(&mut self, _is_done: Option<&Vec<i8>>) -> anyhow::Result<Self::Obs> {
+            self.state = self.state_init;
+            Ok(TestObs { obs: self.state })
+        }
+
+        fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result<Self::Obs> {
+            self.state = self.state_init;
+            Ok(TestObs { obs: self.state })
+        }
+
+        fn step_with_reset(
+            &mut self,
+            a: &Self::Act,
+        ) -> (border_core::Step<Self>, border_core::record::Record)
+        where
+            Self: Sized,
+        {
+            self.state = self.state + a.act;
+            let step = border_core::Step {
+                obs: TestObs { obs: self.state },
+                act: a.clone(),
+                reward: vec![0.0],
+                is_terminated: vec![0],
+                is_truncated: vec![0],
+                info: TestInfo {},
+                init_obs: TestObs {
+                    obs: self.state_init,
+                },
+            };
+            return (step, border_core::record::Record::empty());
+        }
+
+        fn step(&mut self, a: &Self::Act) -> (border_core::Step<Self>, border_core::record::Record)
+        where
+            Self: Sized,
+        {
+            self.state = self.state + a.act;
+            let step = border_core::Step {
+                obs: TestObs { obs: self.state },
+                act: a.clone(),
+                reward: vec![0.0],
+                is_terminated: vec![0],
+                is_truncated: vec![0],
+                info: TestInfo {},
+                init_obs: TestObs {
+                    obs: self.state_init,
+                },
+            };
+            return (step, border_core::record::Record::empty());
+        }
+
+        fn build(config: &Self::Config, _seed: i64) -> anyhow::Result<Self>
+        where
+            Self: Sized,
+        {
+            Ok(Self {
+                state_init: *config,
+                state: 0,
+            })
+        }
+    }
+
+    type ReplayBuffer =
+        border_core::generic_replay_buffer::SimpleReplayBuffer<TestObsBatch, TestActBatch>;
+
+    /// Agent for testing.
+    pub struct TestAgent {}
+
+    #[derive(Clone, Deserialize, Serialize)]
+    /// Config of agent for testing.
+    pub struct TestAgentConfig;
+
+    impl border_core::Agent<TestEnv, ReplayBuffer> for TestAgent {
+        fn train(&mut self) {}
+
+        fn is_train(&self) -> bool {
+            false
+        }
+
+        fn eval(&mut self) {}
+
+        fn opt_with_record(&mut self, _buffer: &mut ReplayBuffer) -> border_core::record::Record {
+            border_core::record::Record::empty()
+        }
+
+        fn save_params<T: AsRef<std::path::Path>>(&self, _path: T) -> anyhow::Result<()> {
+            Ok(())
+        }
+
+        fn load_params<T: AsRef<std::path::Path>>(&mut self, _path: T) -> anyhow::Result<()> {
+            Ok(())
+        }
+    }
+
+    impl border_core::Policy<TestEnv> for TestAgent {
+        fn sample(&mut self, _obs: &TestObs) -> TestAct {
+            TestAct { act: 1 }
+        }
+    }
+
+    impl border_core::Configurable<TestEnv> for TestAgent {
+        type Config = TestAgentConfig;
+
+        fn build(_config: Self::Config) -> Self {
+            Self {}
+        }
+    }
+
+    impl crate::SyncModel for TestAgent {
+        type ModelInfo = usize;
+
+        fn model_info(&self) -> (usize, Self::ModelInfo) {
+            (0, 0)
+        }
+
+        fn sync_model(&mut self, _model_info: &Self::ModelInfo) {
+            // nothing to do
+        }
+    }
+}
diff --git a/border-async-trainer/src/replay_buffer_proxy.rs b/border-async-trainer/src/replay_buffer_proxy.rs
index ccd263e0..263c5beb 100644
--- a/border-async-trainer/src/replay_buffer_proxy.rs
+++ b/border-async-trainer/src/replay_buffer_proxy.rs
@@ -9,7 +9,7 @@ use std::marker::PhantomData;
 pub struct ReplayBufferProxyConfig {
     /// Number of samples buffered until sent to the trainer.
     ///
-    /// Here, a sample corresponds to a `R::Item` for [`ReplayBufferProxy`]`<R>`.
+    /// A sample is a `R::Item` for [`ReplayBufferProxy`]`<R>`.
     pub n_buffer: usize,
 }
 
diff --git a/border-atari-env/src/act.rs b/border-atari-env/src/act.rs
index 9ee50316..6feae6bf 100644
--- a/border-atari-env/src/act.rs
+++ b/border-atari-env/src/act.rs
@@ -5,7 +5,9 @@ use serde::{Deserialize, Serialize};
 use std::{default::Default, marker::PhantomData};
 
 #[derive(Debug, Clone)]
-/// Action for [BorderAtariEnv](crate::BorderAtariEnv)
+/// Action for [`BorderAtariEnv`](crate::BorderAtariEnv).
+/// 
+/// This action is a discrete action and denotes pushing a button.
 pub struct BorderAtariAct {
     pub act: u8,
 }
@@ -28,7 +30,7 @@ impl From<u8> for BorderAtariAct {
     }
 }
 
-/// Converts `A` to [`BorderAtariAct`].
+/// Converts action of type `A` to [`BorderAtariAct`].
 pub trait BorderAtariActFilter<A: Act> {
     /// Configuration of the filter.
     type Config: Clone + Default;
@@ -56,7 +58,7 @@ impl Default for BorderAtariActRawFilterConfig {
     }
 }
 
-/// A filter without any processing.
+/// A filter that performs no processing.
 pub struct BorderAtariActRawFilter<A> {
     phantom: PhantomData<A>,
 }
diff --git a/border-atari-env/src/atari_env.rs b/border-atari-env/src/atari_env.rs
index ec4d0758..b2731cd2 100644
--- a/border-atari-env/src/atari_env.rs
+++ b/border-atari-env/src/atari_env.rs
@@ -1,3 +1,4 @@
+//! Atari environment for reinforcement learning.
 pub mod ale;
 use std::path::Path;
 
diff --git a/border-atari-env/src/env/config.rs b/border-atari-env/src/env/config.rs
index 1f410644..9abb742b 100644
--- a/border-atari-env/src/env/config.rs
+++ b/border-atari-env/src/env/config.rs
@@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
 use std::{default::Default, env};
 
 #[derive(Serialize, Deserialize, Debug)]
-/// Configurations of [`BorderAtariEnv`](super::BorderAtariEnv).
+/// Configuration of [`BorderAtariEnv`](super::BorderAtariEnv).
 pub struct BorderAtariEnvConfig<O, A, OF, AF>
 where
     O: Obs,
diff --git a/border-atari-env/src/lib.rs b/border-atari-env/src/lib.rs
index 0f2d8277..7d47560e 100644
--- a/border-atari-env/src/lib.rs
+++ b/border-atari-env/src/lib.rs
@@ -1,13 +1,13 @@
-//! A thin wrapper of [atari-env](https://crates.io/crates/atari-env) for [Border](https://crates.io/crates/border).
+//! A thin wrapper of [`atari-env`](https://crates.io/crates/atari-env) for [`Border`](https://crates.io/crates/border).
 //!
 //! The code under [atari_env] is adapted from the
-//! [atari-env](https://crates.io/crates/atari-env) crate
+//! [`atari-env`](https://crates.io/crates/atari-env) crate
 //! (rev = `0ef0422f953d79e96b32ad14284c9600bd34f335`),
 //! because the crate registered in crates.io does not implement
 //! [`atari_env::AtariEnv::lives()`] method, which is required for episodic life environments.
 //!
 //! This environment applies some preprocessing to observation as in
-//! [atari_wrapper.py](https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py).
+//! [`atari_wrapper.py`](https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py).
 //!
 //! You need to place Atari Rom directories under the directory specified by environment variable
 //! `ATARI_ROM_DIR`. An easy way to do this is to use [AutoROM](https://pypi.org/project/AutoROM/)
@@ -28,55 +28,50 @@
 //!     BorderAtariAct, BorderAtariActRawFilter, BorderAtariEnv, BorderAtariEnvConfig,
 //!     BorderAtariObs, BorderAtariObsRawFilter,
 //! };
-//! use border_core::{util, Env as _, Policy, DefaultEvaluator, Evaluator as _};
-//!
-//! type Obs = BorderAtariObs;
-//! type Act = BorderAtariAct;
-//! type ObsFilter = BorderAtariObsRawFilter<Obs>;
-//! type ActFilter = BorderAtariActRawFilter<Act>;
-//! type EnvConfig = BorderAtariEnvConfig<Obs, Act, ObsFilter, ActFilter>;
-//! type Env = BorderAtariEnv<Obs, Act, ObsFilter, ActFilter>;
-//!
-//! #[derive(Clone)]
-//! struct RandomPolicyConfig {
-//!     pub n_acts: usize,
-//! }
-//!
-//! struct RandomPolicy {
-//!     n_acts: usize,
-//! }
-//!
-//! impl Policy<Env> for RandomPolicy {
-//!     type Config = RandomPolicyConfig;
-//!
-//!     fn build(config: Self::Config) -> Self {
-//!         Self {
-//!             n_acts: config.n_acts,
-//!         }
-//!     }
-//!
-//!     fn sample(&mut self, _: &Obs) -> Act {
-//!         fastrand::u8(..self.n_acts as u8).into()
-//!     }
-//! }
-//!
-//! fn env_config(name: String) -> EnvConfig {
-//!     EnvConfig::default().name(name)
-//! }
-//!
+//! use border_core::{Env as _, Policy, DefaultEvaluator, Evaluator as _};
+//!
+//! # type Obs = BorderAtariObs;
+//! # type Act = BorderAtariAct;
+//! # type ObsFilter = BorderAtariObsRawFilter<Obs>;
+//! # type ActFilter = BorderAtariActRawFilter<Act>;
+//! # type EnvConfig = BorderAtariEnvConfig<Obs, Act, ObsFilter, ActFilter>;
+//! # type Env = BorderAtariEnv<Obs, Act, ObsFilter, ActFilter>;
+//! #
+//! # #[derive(Clone)]
+//! # struct RandomPolicyConfig {
+//! #     pub n_acts: usize,
+//! # }
+//! #
+//! # struct RandomPolicy {
+//! #     n_acts: usize,
+//! # }
+//! #
+//! # impl RandomPolicy {
+//! #     pub fn build(n_acts: usize) -> Self {
+//! #         Self { n_acts }
+//! #     }
+//! # }
+//! #
+//! # impl Policy<Env> for RandomPolicy {
+//! #     fn sample(&mut self, _: &Obs) -> Act {
+//! #         fastrand::u8(..self.n_acts as u8).into()
+//! #     }
+//! # }
+//! #
+//! # fn env_config(name: String) -> EnvConfig {
+//! #     EnvConfig::default().name(name)
+//! # }
+//! #
 //! fn main() -> Result<()> {
-//!     env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
-//!     fastrand::seed(42);
-//!
+//! #     env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
+//! #     fastrand::seed(42);
+//! #
 //!     // Creates Pong environment
 //!     let env_config = env_config("pong".to_string());
 //!
 //!     // Creates a random policy
-//!     let n_acts = 4; // number of actions;
-//!     let policy_config = RandomPolicyConfig {
-//!         n_acts: n_acts as _,
-//!     };
-//!     let mut policy = RandomPolicy::build(policy_config);
+//!     let n_acts = 4;
+//!     let mut policy = RandomPolicy::build(n_acts);
 //!
 //!     // Runs evaluation
 //!     let env_config = env_config.render(true);
diff --git a/border-atari-env/src/obs.rs b/border-atari-env/src/obs.rs
index f37e4ab0..2a5e4b59 100644
--- a/border-atari-env/src/obs.rs
+++ b/border-atari-env/src/obs.rs
@@ -67,7 +67,7 @@ impl From<BorderAtariObs> for Tensor {
     }
 }
 
-/// Converts [`BorderAtariObs`] to `O` with an arbitrary processing.
+/// Converts [`BorderAtariObs`] to observation of type `O` with an arbitrary processing.
 pub trait BorderAtariObsFilter<O: Obs> {
     /// Configuration of the filter.
     type Config: Clone + Default;
@@ -98,7 +98,7 @@ impl Default for BorderAtariObsRawFilterConfig {
     }
 }
 
-/// A filter without any processing.
+/// A filter that performs no processing.
 pub struct BorderAtariObsRawFilter<O> {
     phantom: PhantomData<O>,
 }
diff --git a/border-atari-env/src/util.rs b/border-atari-env/src/util.rs
index 7b788c20..8788d89b 100644
--- a/border-atari-env/src/util.rs
+++ b/border-atari-env/src/util.rs
@@ -1 +1,2 @@
+//! Utility functions for testing.
 pub mod test;
diff --git a/border-candle-agent/src/dqn/base.rs b/border-candle-agent/src/dqn/base.rs
index a7f2d7fb..f67055b0 100644
--- a/border-candle-agent/src/dqn/base.rs
+++ b/border-candle-agent/src/dqn/base.rs
@@ -1,4 +1,4 @@
-//! DQN agent implemented with tch-rs.
+//! DQN agent implemented with candle.
 use super::{config::DqnConfig, explorer::DqnExplorer, model::DqnModel};
 use crate::{
     model::SubModel1,
@@ -17,7 +17,7 @@ use std::convert::TryFrom;
 use std::{fs, marker::PhantomData, path::Path};
 
 #[allow(clippy::upper_case_acronyms, dead_code)]
-/// DQN agent implemented with tch-rs.
+/// DQN agent implemented with candle.
 pub struct Dqn<E, Q, R>
 where
     Q: SubModel1<Output = Tensor>,
@@ -330,6 +330,10 @@ where
         record
     }
 
+    /// Save model parameters in the given directory.
+    /// 
+    /// The parameters of the model are saved as `qnet.pt`.
+    /// The parameters of the target model are saved as `qnet_tgt.pt`.
     fn save_params<T: AsRef<Path>>(&self, path: T) -> Result<()> {
         // TODO: consider to rename the path if it already exists
         fs::create_dir_all(&path)?;
diff --git a/border-candle-agent/src/dqn/model.rs b/border-candle-agent/src/dqn/model.rs
index c14f888d..90efee8b 100644
--- a/border-candle-agent/src/dqn/model.rs
+++ b/border-candle-agent/src/dqn/model.rs
@@ -79,6 +79,12 @@ where
     }
 }
 
+/// Action value function model for DQN.
+/// 
+/// The architecture of the model is defined by the type parameter `Q`,
+/// which should implement [`SubModel1`].
+/// This takes [`SubModel1::Input`] as input and outputs a tensor.
+/// The output tensor should have the same dimension as the number of actions.
 pub struct DqnModel<Q>
 where
     Q: SubModel1<Output = Tensor>,
diff --git a/border-candle-agent/src/lib.rs b/border-candle-agent/src/lib.rs
index 2ae29440..9809a488 100644
--- a/border-candle-agent/src/lib.rs
+++ b/border-candle-agent/src/lib.rs
@@ -16,6 +16,8 @@ pub use tensor_batch::{TensorBatch, ZeroTensor};
 /// Device for using candle.
 ///
 /// This enum is added because [`candle_core::Device`] does not support serialization.
+///
+/// [`candle_core::Device`]: https://docs.rs/candle-core/0.4.1/candle_core/enum.Device.html
 pub enum Device {
     /// The main CPU device.
     Cpu,
diff --git a/border-candle-agent/src/model.rs b/border-candle-agent/src/model.rs
index 43f245c4..6fbacc6a 100644
--- a/border-candle-agent/src/model.rs
+++ b/border-candle-agent/src/model.rs
@@ -7,7 +7,7 @@ use candle_nn::VarBuilder;
 
 /// Neural network model not owing its [`VarMap`] internally.
 ///
-/// [`VarMap`]: candle_nn::VarMap
+/// [`VarMap`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_map/struct.VarMap.html
 pub trait SubModel1 {
     /// Configuration from which [`SubModel1`] is constructed.
     type Config;
@@ -19,6 +19,8 @@ pub trait SubModel1 {
     type Output;
 
     /// Builds [`SubModel1`] with [`VarBuilder`] and [`SubModel1::Config`].
+    /// 
+    /// [`VarBuilder`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_builder/type.VarBuilder.html
     fn build(vb: VarBuilder, config: Self::Config) -> Self;
 
     /// A generalized forward function.
@@ -29,7 +31,7 @@ pub trait SubModel1 {
 ///
 /// The difference from [`SubModel1`] is that this trait takes two inputs.
 ///
-/// [`VarMap`]: candle_nn::VarMap
+/// [`VarMap`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_map/struct.VarMap.html
 pub trait SubModel2 {
     /// Configuration from which [`SubModel2`] is constructed.
     type Config;
diff --git a/border-candle-agent/src/opt.rs b/border-candle-agent/src/opt.rs
index 48fe7cd4..0dff9522 100644
--- a/border-candle-agent/src/opt.rs
+++ b/border-candle-agent/src/opt.rs
@@ -113,6 +113,8 @@ impl Default for OptimizerConfig {
 /// Optimizers.
 ///
 /// This is a thin wrapper of [`candle_nn::optim::Optimizer`].
+/// 
+/// [`candle_nn::optim::Optimizer`]: https://docs.rs/candle-nn/0.4.1/candle_nn/optim/trait.Optimizer.html
 pub enum Optimizer {
     /// Adam optimizer.
     AdamW(AdamW),
diff --git a/border-candle-agent/src/sac.rs b/border-candle-agent/src/sac.rs
index bd2b31ea..89164dfd 100644
--- a/border-candle-agent/src/sac.rs
+++ b/border-candle-agent/src/sac.rs
@@ -1,10 +1,156 @@
 //! SAC agent.
 //!
-//! Here is an example in `border/examples/sac_pendulum.rs`
+//! Here is an example of creating SAC agent:
 //!
-//! ```rust,ignore
+//! ```no_run
+//! # use anyhow::Result;
+//! use border_core::{
+//! #     Env as Env_, Obs as Obs_, Act as Act_, Step, test::{
+//! #         TestAct as TestAct_, TestActBatch as TestActBatch_,
+//! #         TestEnv as TestEnv_,
+//! #         TestObs as TestObs_, TestObsBatch as TestObsBatch_, 
+//! #     },
+//! #     record::Record,
+//! #     generic_replay_buffer::{SimpleReplayBuffer, BatchBase},
+//!       Configurable,
+//! };
+//! use border_candle_agent::{
+//!     sac::{ActorConfig, CriticConfig, Sac, SacConfig},
+//!     mlp::{Mlp, Mlp2, MlpConfig},
+//!     opt::OptimizerConfig
+//! };
+//! 
+//! # struct TestEnv(TestEnv_);
+//! # #[derive(Clone, Debug)]
+//! # struct TestObs(TestObs_);
+//! # #[derive(Clone, Debug)]
+//! # struct TestAct(TestAct_);
+//! # struct TestObsBatch(TestObsBatch_);
+//! # struct TestActBatch(TestActBatch_);
+//! #
+//! # impl Obs_ for TestObs {
+//! #     fn dummy(n: usize) -> Self {
+//! #         Self(TestObs_::dummy(n))
+//! #     }
+//! #
+//! #     fn len(&self) -> usize {
+//! #         self.0.len()
+//! #     }
+//! # }
+//! #
+//! # impl Into<candle_core::Tensor> for TestObs {
+//! #     fn into(self) -> candle_core::Tensor {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl BatchBase for TestObsBatch {
+//! #     fn new(n: usize) -> Self {
+//! #         Self(TestObsBatch_::new(n))
+//! #     }
+//! #
+//! #     fn push(&mut self, ix: usize, data: Self) {
+//! #         self.0.push(ix, data.0);
+//! #     }
+//! #
+//! #     fn sample(&self, ixs: &Vec<usize>) -> Self {
+//! #         Self(self.0.sample(ixs))
+//! #     }
+//! # }
+//! #
+//! # impl BatchBase for TestActBatch {
+//! #     fn new(n: usize) -> Self {
+//! #         Self(TestActBatch_::new(n))
+//! #     }
+//! #
+//! #     fn push(&mut self, ix: usize, data: Self) {
+//! #         self.0.push(ix, data.0);
+//! #     }
+//! #
+//! #     fn sample(&self, ixs: &Vec<usize>) -> Self {
+//! #         Self(self.0.sample(ixs))
+//! #     }
+//! # }
+//! #
+//! # impl Act_ for TestAct {
+//! #     fn len(&self) -> usize {
+//! #         self.0.len()
+//! #     }
+//! # }
+//! #
+//! # impl From<candle_core::Tensor> for TestAct {
+//! #     fn from(t: candle_core::Tensor) -> Self {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl Into<candle_core::Tensor> for TestAct {
+//! #     fn into(self) -> candle_core::Tensor {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl Env_ for TestEnv {
+//! #     type Config = <TestEnv_ as Env_>::Config;
+//! #     type Obs = TestObs;
+//! #     type Act = TestAct;
+//! #     type Info = <TestEnv_ as Env_>::Info;
+//! #
+//! #     fn build(config: &Self::Config, seed: i64) -> Result<Self> {
+//! #         Ok(Self(TestEnv_::build(&config, seed).unwrap()))
+//! #     }
+//! #
+//! #     fn step(&mut self, act: &TestAct) -> (Step<Self>, Record) {
+//! #         let (step, record) = self.0.step(&act.0);
+//! #         let step = Step {
+//! #             obs: TestObs(step.obs),
+//! #             act: TestAct(step.act),
+//! #             reward: step.reward,
+//! #             is_terminated: step.is_terminated,
+//! #             is_truncated: step.is_truncated,
+//! #             info: step.info,
+//! #             init_obs: TestObs(step.init_obs),
+//! #         };
+//! #         (step, record)
+//! #     }
+//! #
+//! #     fn reset(&mut self, is_done: Option<&Vec<i8>>) -> Result<TestObs> {
+//! #         Ok(TestObs(self.0.reset(is_done).unwrap()))
+//! #     }
+//! #
+//! #     fn step_with_reset(&mut self, a: &TestAct) -> (Step<Self>, Record) {
+//! #         let (step, record) = self.0.step_with_reset(&a.0);
+//! #         let step = Step {
+//! #             obs: TestObs(step.obs),
+//! #             act: TestAct(step.act),
+//! #             reward: step.reward,
+//! #             is_terminated: step.is_terminated,
+//! #             is_truncated: step.is_truncated,
+//! #             info: step.info,
+//! #             init_obs: TestObs(step.init_obs),
+//! #         };
+//! #         (step, record)
+//! #     }
+//! #
+//! #     fn reset_with_index(&mut self, ix: usize) -> Result<TestObs> {
+//! #         Ok(TestObs(self.0.reset_with_index(ix).unwrap()))
+//! #     }
+//! # }
+//! #
+//! # type Env = TestEnv;
+//! # type ObsBatch = TestObsBatch;
+//! # type ActBatch = TestActBatch;
+//! # type ReplayBuffer = SimpleReplayBuffer<ObsBatch, ActBatch>;
+//! # 
+//! const DIM_OBS: i64 = 3;
+//! const DIM_ACT: i64 = 1;
+//! const LR_ACTOR: f64 = 1e-3;
+//! const LR_CRITIC: f64 = 1e-3;
+//! const BATCH_SIZE: usize = 256;
+//! 
 //! fn create_agent(in_dim: i64, out_dim: i64) -> Sac<Env, Mlp, Mlp2, ReplayBuffer> {
-//!     let device = tch::Device::cuda_if_available();
+//!     let device = candle_core::Device::cuda_if_available(0).unwrap();
+//! 
 //!     let actor_config = ActorConfig::default()
 //!         .opt_config(OptimizerConfig::Adam { lr: LR_ACTOR })
 //!         .out_dim(out_dim)
@@ -12,25 +158,13 @@
 //!     let critic_config = CriticConfig::default()
 //!         .opt_config(OptimizerConfig::Adam { lr: LR_CRITIC })
 //!         .q_config(MlpConfig::new(in_dim + out_dim, vec![64, 64], 1, true));
-//!     let sac_config = SacConfig::default()
+//!     let sac_config = SacConfig::<Mlp, Mlp2>::default()
 //!         .batch_size(BATCH_SIZE)
-//!         .min_transitions_warmup(N_TRANSITIONS_WARMUP)
 //!         .actor_config(actor_config)
 //!         .critic_config(critic_config)
 //!         .device(device);
 //!     Sac::build(sac_config)
 //! }
-//!
-//! fn train(max_opts: usize, model_dir: &str, eval_interval: usize) -> Result<()> {
-//!     let trainer = //...
-//!     let mut agent = create_agent(DIM_OBS, DIM_ACT);
-//!     let mut recorder = TensorboardRecorder::new(model_dir);
-//!     let mut evaluator = Evaluator::new(&env_config(), 0, N_EPISODES_PER_EVAL)?;
-//!
-//!     trainer.train(&mut agent, &mut recorder, &mut evaluator)?;
-//!
-//!     Ok(())
-//! }
 //! ```
 mod actor;
 mod base;
diff --git a/border-candle-agent/src/sac/config.rs b/border-candle-agent/src/sac/config.rs
index b789090f..3256740f 100644
--- a/border-candle-agent/src/sac/config.rs
+++ b/border-candle-agent/src/sac/config.rs
@@ -18,7 +18,7 @@ use std::{
     path::Path,
 };
 
-/// Constructs [`Sac`](super::Sac).
+/// Configuration of [`Sac`](super::Sac).
 #[allow(clippy::upper_case_acronyms)]
 #[derive(Debug, Deserialize, Serialize, PartialEq)]
 pub struct SacConfig<Q, P>
diff --git a/border-candle-agent/src/tensor_batch.rs b/border-candle-agent/src/tensor_batch.rs
index 21c031bb..410ac023 100644
--- a/border-candle-agent/src/tensor_batch.rs
+++ b/border-candle-agent/src/tensor_batch.rs
@@ -1,7 +1,9 @@
 use border_core::generic_replay_buffer::BatchBase;
 use candle_core::{error::Result, DType, Device, Tensor};
 
-/// Adds capability of constructing [Tensor] with a static method.
+/// Adds capability of constructing [`Tensor`] with a static method.
+/// 
+/// [`Tensor`]: https://docs.rs/candle-core/0.4.1/candle_core/struct.Tensor.html
 pub trait ZeroTensor {
     /// Constructs zero tensor.
     fn zeros(shape: &[usize]) -> Result<Tensor>;
@@ -28,6 +30,8 @@ impl ZeroTensor for i64 {
 /// A buffer consisting of a [`Tensor`].
 ///
 /// The internal buffer is `Vec<Tensor>`.
+/// 
+/// [`Tensor`]: https://docs.rs/candle-core/0.4.1/candle_core/struct.Tensor.html
 #[derive(Clone, Debug)]
 pub struct TensorBatch {
     buf: Vec<Tensor>,
diff --git a/border-candle-agent/src/util.rs b/border-candle-agent/src/util.rs
index dad828f6..1e967546 100644
--- a/border-candle-agent/src/util.rs
+++ b/border-candle-agent/src/util.rs
@@ -23,27 +23,10 @@ pub enum CriticLoss {
     SmoothL1,
 }
 
-// /// Apply soft update on a model.
-// ///
-// /// Variables are identified by their names.
-// pub fn track<M: ModelBase>(dest: &mut M, src: &mut M, tau: f64) {
-//     let src = &mut src.get_var_store().variables();
-//     let dest = &mut dest.get_var_store().variables();
-//     debug_assert_eq!(src.len(), dest.len());
-
-//     let names = src.keys();
-//     tch::no_grad(|| {
-//         for name in names {
-//             let src = src.get(name).unwrap();
-//             let dest = dest.get_mut(name).unwrap();
-//             dest.copy_(&(tau * src + (1.0 - tau) * &*dest));
-//         }
-//     });
-//     trace!("soft update");
-// }
-
-/// Apply soft update on model parameters.
+/// Apply soft update on variables.
 ///
+/// Variables are identified by their names.
+/// 
 /// dest = tau * src + (1.0 - tau) * dest
 pub fn track(dest: &VarMap, src: &VarMap, tau: f64) -> Result<()> {
     trace!("dest");
@@ -69,6 +52,7 @@ pub fn track(dest: &VarMap, src: &VarMap, tau: f64) -> Result<()> {
 //     v
 // }
 
+/// Interface for handling output dimensions.
 pub trait OutDim {
     /// Returns the output dimension.
     fn get_out_dim(&self) -> i64;
@@ -141,6 +125,7 @@ pub fn smooth_l1_loss(x: &Tensor, y: &Tensor) -> Result<Tensor, candle_core::Err
     (((0.5 * m1)? * d.powf(2.0))? + m2 * (d - 0.5))?.mean_all()
 }
 
+/// Returns the standard deviation of a tensor.
 pub fn std(t: &Tensor) -> f32 {
     t.broadcast_sub(&t.mean_all().unwrap())
         .unwrap()
@@ -154,6 +139,7 @@ pub fn std(t: &Tensor) -> f32 {
         .unwrap()
 }
 
+/// Returns the mean and standard deviation of the parameters.
 pub fn param_stats(varmap: &VarMap) -> Record {
     let mut record = Record::empty();
 
diff --git a/border-core/src/lib.rs b/border-core/src/lib.rs
index c14d4a8a..0f8efd09 100644
--- a/border-core/src/lib.rs
+++ b/border-core/src/lib.rs
@@ -4,26 +4,25 @@
 //! # Observation and action
 //!
 //! [`Obs`] and [`Act`] traits are abstractions of observation and action in environments.
-//! These traits can handle two or more samples for implementing vectorized environments.
+//! These traits can handle two or more samples for implementing vectorized environments,
+//! although there is currently no implementation of vectorized environment.
 //!
 //! # Environment
 //!
 //! [`Env`] trait is an abstraction of environments. It has four associated types:
 //! `Config`, `Obs`, `Act` and `Info`. `Obs` and `Act` are concrete types of
 //! observation and action of the environment.
-//! These must implement [`Obs`] and [`Act`] traits, respectively.
+//! These types must implement [`Obs`] and [`Act`] traits, respectively.
 //! The environment that implements [`Env`] generates [`Step<E: Env>`] object
 //! at every environment interaction step with [`Env::step()`] method.
-//!
-//! `Info` stores some information at every step of interactions of an agent and
+//! [`Info`] stores some information at every step of interactions of an agent and
 //! the environment. It could be empty (zero-sized struct). `Config` represents
 //! configurations of the environment and is used to build.
 //!
 //! # Policy
 //!
-//! [`Policy<E: Env>`] represents a policy, from which actions are sampled for
-//! environment `E`. [`Policy::sample()`] takes `E::Obs` and emits `E::Act`.
-//! It could be probabilistic or deterministic.
+//! [`Policy<E: Env>`] represents a policy. [`Policy::sample()`] takes `E::Obs` and
+//! generates `E::Act`. It could be probabilistic or deterministic.
 //!
 //! # Agent
 //!
@@ -32,34 +31,36 @@
 //! the agent's policy might be probabilistic for exploration, while in evaluation mode,
 //! the policy might be deterministic.
 //!
-//! [`Agent::opt()`] method does a single optimization step. The definition of an
-//! optimization step depends on each agent. It might be multiple stochastic gradient
+//! The [`Agent::opt()`] method performs a single optimization step. The definition of an
+//! optimization step varies for each agent. It might be multiple stochastic gradient
 //! steps in an optimization step. Samples for training are taken from
 //! [`R: ReplayBufferBase`][`ReplayBufferBase`].
 //!
-//! This trait also has methods for saving/loading the trained policy
-//! in the given directory.
+//! This trait also has methods for saving/loading parameters of the trained policy
+//! in a directory.
 //!
 //! # Batch
 //!
 //! [`TransitionBatch`] is a trait of a batch of transitions `(o_t, r_t, a_t, o_t+1)`.
-//! This is used to train [`Agent`]s with an RL algorithm.
+//! This trait is used to train [`Agent`]s using an RL algorithm.
 //!
-//! # Replay buffer
+//! # Replay buffer and experience buffer
 //!
-//! [`ReplayBufferBase`] trait is an abstraction of replay buffers. For handling samples,
-//! there are two associated types: `Item` and `Batch`. `Item` is a type
-//! representing samples pushed to the buffer. These samples might be generated from
-//! [`Step<E: Env>`]. [`StepProcessor<E: Env>`] trait provides the interface
-//! for converting [`Step<E: Env>`] into `Item`.
+//! [`ReplayBufferBase`] trait is an abstraction of replay buffers.
+//! One of the associated type [`ReplayBufferBase::Batch`] represents samples taken from
+//! the buffer for training [`Agent`]s. Agents must implements [`Agent::opt()`] method,
+//! where [`ReplayBufferBase::Batch`] has an appropriate type or trait bound(s) to train
+//! the agent.
 //!
-//! `Batch` is a type of samples taken from the buffer for training [`Agent`]s.
-//! The user implements [`Agent::opt()`] method such that it handles `Batch` objects
-//! for doing an optimization step.
+//! As explained above, [`ReplayBufferBase`] trait has an ability to generates batches
+//! of samples with which agents are trained. On the other hand, [`ExperienceBufferBase`]
+//! trait has an ability to store samples. [`ExperienceBufferBase::push()`] is used to push
+//! samples of type [`ExperienceBufferBase::Item`], which might be obtained via interaction
+//! steps with an environment.
 //!
 //! ## A reference implementation
 //!
-//! [`SimpleReplayBuffer<O, A>`] implementats [`ReplayBufferBase`].
+//! [`SimpleReplayBuffer<O, A>`] implementats both [`ReplayBufferBase`] and [`ExperienceBufferBase`].
 //! This type has two parameters `O` and `A`, which are representation of
 //! observation and action in the replay buffer. `O` and `A` must implement
 //! [`BatchBase`], which has the functionality of storing samples, like `Vec<T>`,
@@ -74,10 +75,12 @@
 //! # Trainer
 //!
 //! [`Trainer`] manages training loop and related objects. The [`Trainer`] object is
-//! built with configurations of [`Env`], [`ReplayBufferBase`], [`StepProcessor`]
-//! and some training parameters. Then, [`Trainer::train`] method starts training loop with
-//! given [`Agent`] and [`Recorder`](crate::record::Recorder).
-//!
+//! built with configurations of training parameters such as the maximum number of 
+//! optimization steps, model directory to save parameters of the agent during training, etc.
+//! [`Trainer::train`] method executes online training of an agent on an environment.
+//! In the training loop of this method, the agent interacts with the environment to
+//! take samples and perform optimization steps. Some metrices are recorded at the same time.
+//! 
 //! [`SimpleReplayBuffer`]: replay_buffer::SimpleReplayBuffer
 //! [`SimpleReplayBuffer<O, A>`]: generic_replay_buffer::SimpleReplayBuffer
 //! [`BatchBase`]: generic_replay_buffer::BatchBase
@@ -98,3 +101,214 @@ pub use base::{
 mod trainer;
 pub use evaluator::{DefaultEvaluator, Evaluator};
 pub use trainer::{Sampler, Trainer, TrainerConfig};
+
+// TODO: Consider to compile this module only for tests.
+/// Agent and Env for testing.
+pub mod test {
+    use serde::{Deserialize, Serialize};
+
+    /// Obs for testing.
+    #[derive(Clone, Debug)]
+    pub struct TestObs {
+        obs: usize,
+    }
+
+    impl crate::Obs for TestObs {
+        fn dummy(_n: usize) -> Self {
+            Self { obs: 0 }
+        }
+
+        fn len(&self) -> usize {
+            1
+        }
+    }
+
+    /// Batch of obs for testing.
+    pub struct TestObsBatch {
+        obs: Vec<usize>,
+    }
+
+    impl crate::generic_replay_buffer::BatchBase for TestObsBatch {
+        fn new(capacity: usize) -> Self {
+            Self {
+                obs: vec![0; capacity],
+            }
+        }
+
+        fn push(&mut self, i: usize, data: Self) {
+            self.obs[i] = data.obs[0];
+        }
+
+        fn sample(&self, ixs: &Vec<usize>) -> Self {
+            let obs = ixs.iter().map(|ix| self.obs[*ix]).collect();
+            Self { obs }
+        }
+    }
+
+    impl From<TestObs> for TestObsBatch {
+        fn from(obs: TestObs) -> Self {
+            Self { obs: vec![obs.obs] }
+        }
+    }
+
+    /// Act for testing.
+    #[derive(Clone, Debug)]
+    pub struct TestAct {
+        act: usize,
+    }
+
+    impl crate::Act for TestAct {}
+
+    /// Batch of act for testing.
+    pub struct TestActBatch {
+        act: Vec<usize>,
+    }
+
+    impl From<TestAct> for TestActBatch {
+        fn from(act: TestAct) -> Self {
+            Self { act: vec![act.act] }
+        }
+    }
+
+    impl crate::generic_replay_buffer::BatchBase for TestActBatch {
+        fn new(capacity: usize) -> Self {
+            Self {
+                act: vec![0; capacity],
+            }
+        }
+
+        fn push(&mut self, i: usize, data: Self) {
+            self.act[i] = data.act[0];
+        }
+
+        fn sample(&self, ixs: &Vec<usize>) -> Self {
+            let act = ixs.iter().map(|ix| self.act[*ix]).collect();
+            Self { act }
+        }
+    }
+
+    /// Info for testing.
+    pub struct TestInfo {}
+
+    impl crate::Info for TestInfo {}
+
+    /// Environment for testing.
+    pub struct TestEnv {
+        state_init: usize,
+        state: usize,
+    }
+
+    impl crate::Env for TestEnv {
+        type Config = usize;
+        type Obs = TestObs;
+        type Act = TestAct;
+        type Info = TestInfo;
+
+        fn reset(&mut self, _is_done: Option<&Vec<i8>>) -> anyhow::Result<Self::Obs> {
+            self.state = self.state_init;
+            Ok(TestObs { obs: self.state })
+        }
+
+        fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result<Self::Obs> {
+            self.state = self.state_init;
+            Ok(TestObs { obs: self.state })
+        }
+
+        fn step_with_reset(
+            &mut self,
+            a: &Self::Act,
+        ) -> (crate::Step<Self>, crate::record::Record)
+        where
+            Self: Sized,
+        {
+            self.state = self.state + a.act;
+            let step = crate::Step {
+                obs: TestObs { obs: self.state },
+                act: a.clone(),
+                reward: vec![0.0],
+                is_terminated: vec![0],
+                is_truncated: vec![0],
+                info: TestInfo {},
+                init_obs: TestObs {
+                    obs: self.state_init,
+                },
+            };
+            return (step, crate::record::Record::empty());
+        }
+
+        fn step(&mut self, a: &Self::Act) -> (crate::Step<Self>, crate::record::Record)
+        where
+            Self: Sized,
+        {
+            self.state = self.state + a.act;
+            let step = crate::Step {
+                obs: TestObs { obs: self.state },
+                act: a.clone(),
+                reward: vec![0.0],
+                is_terminated: vec![0],
+                is_truncated: vec![0],
+                info: TestInfo {},
+                init_obs: TestObs {
+                    obs: self.state_init,
+                },
+            };
+            return (step, crate::record::Record::empty());
+        }
+
+        fn build(config: &Self::Config, _seed: i64) -> anyhow::Result<Self>
+        where
+            Self: Sized,
+        {
+            Ok(Self {
+                state_init: *config,
+                state: 0,
+            })
+        }
+    }
+
+    type ReplayBuffer =
+        crate::generic_replay_buffer::SimpleReplayBuffer<TestObsBatch, TestActBatch>;
+
+    /// Agent for testing.
+    pub struct TestAgent {}
+
+    #[derive(Clone, Deserialize, Serialize)]
+    /// Config of agent for testing.
+    pub struct TestAgentConfig;
+
+    impl crate::Agent<TestEnv, ReplayBuffer> for TestAgent {
+        fn train(&mut self) {}
+
+        fn is_train(&self) -> bool {
+            false
+        }
+
+        fn eval(&mut self) {}
+
+        fn opt_with_record(&mut self, _buffer: &mut ReplayBuffer) -> crate::record::Record {
+            crate::record::Record::empty()
+        }
+
+        fn save_params<T: AsRef<std::path::Path>>(&self, _path: T) -> anyhow::Result<()> {
+            Ok(())
+        }
+
+        fn load_params<T: AsRef<std::path::Path>>(&mut self, _path: T) -> anyhow::Result<()> {
+            Ok(())
+        }
+    }
+
+    impl crate::Policy<TestEnv> for TestAgent {
+        fn sample(&mut self, _obs: &TestObs) -> TestAct {
+            TestAct { act: 1 }
+        }
+    }
+
+    impl crate::Configurable<TestEnv> for TestAgent {
+        type Config = TestAgentConfig;
+
+        fn build(_config: Self::Config) -> Self {
+            Self {}
+        }
+    }
+}
diff --git a/border-derive/Cargo.toml b/border-derive/Cargo.toml
index 53ba7fff..70a4a40f 100644
--- a/border-derive/Cargo.toml
+++ b/border-derive/Cargo.toml
@@ -25,16 +25,10 @@ border-tch-agent = { version = "0.0.7", path = "../border-tch-agent" }
 border-candle-agent = { version = "0.0.7", path = "../border-candle-agent" }
 border-py-gym-env = { version = "0.0.7", path = "../border-py-gym-env" }
 border-core = { version = "0.0.7", path = "../border-core" }
+border-atari-env = { version = "0.0.7", path = "../border-atari-env" }
 ndarray = { workspace = true }
 tch = { workspace = true }
 candle-core = { workspace = true }
 
-# [features]
-# default = ["tch"]
-
-[[example]]
-name = "test1"
-required-features = ["tch"]
-
 [package.metadata.docs.rs]
 features = ["doc-only"]
diff --git a/border-derive/examples/border_atari_act.rs b/border-derive/examples/border_atari_act.rs
new file mode 100644
index 00000000..4f7b002a
--- /dev/null
+++ b/border-derive/examples/border_atari_act.rs
@@ -0,0 +1,8 @@
+use border_atari_env::BorderAtariAct;
+use border_derive::Act;
+
+#[allow(dead_code)]
+#[derive(Clone, Debug, Act)]
+struct MyAct(BorderAtariAct);
+
+fn main() {}
diff --git a/border-derive/examples/border_gym_cont_act.rs b/border-derive/examples/border_gym_cont_act.rs
new file mode 100644
index 00000000..9015aca0
--- /dev/null
+++ b/border-derive/examples/border_gym_cont_act.rs
@@ -0,0 +1,8 @@
+use border_derive::Act;
+use border_py_gym_env::GymContinuousAct;
+
+#[allow(dead_code)]
+#[derive(Clone, Debug, Act)]
+struct MyAct(GymContinuousAct);
+
+fn main() {}
diff --git a/border-derive/examples/border_gym_disc_act.rs b/border-derive/examples/border_gym_disc_act.rs
new file mode 100644
index 00000000..05d0ea07
--- /dev/null
+++ b/border-derive/examples/border_gym_disc_act.rs
@@ -0,0 +1,8 @@
+use border_derive::Act;
+use border_py_gym_env::GymDiscreteAct;
+
+#[allow(dead_code)]
+#[derive(Clone, Debug, Act)]
+struct MyAct(GymDiscreteAct);
+
+fn main() {}
diff --git a/border-derive/examples/border_tensor_batch.rs b/border-derive/examples/border_tensor_batch.rs
new file mode 100644
index 00000000..697e7b32
--- /dev/null
+++ b/border-derive/examples/border_tensor_batch.rs
@@ -0,0 +1,8 @@
+use border_derive::BatchBase;
+use border_tch_agent::TensorBatch;
+
+#[allow(dead_code)]
+#[derive(Clone, BatchBase)]
+pub struct ObsBatch(TensorBatch);
+
+fn main() {}
diff --git a/border-derive/examples/test1.rs b/border-derive/examples/test1.rs
deleted file mode 100644
index 3551ff5a..00000000
--- a/border-derive/examples/test1.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-use border_derive::{Act, SubBatch};
-use border_py_gym_env::GymDiscreteAct;
-use border_tch_agent::TensorBatch;
-use ndarray::ArrayD;
-use std::convert::TryFrom;
-use tch::Tensor;
-
-#[derive(Debug, Clone)]
-struct Obs(ArrayD<f32>);
-
-#[derive(SubBatch)]
-struct ObsBatch(TensorBatch);
-
-impl From<Obs> for Tensor {
-    fn from(value: Obs) -> Self {
-        Tensor::try_from(&value.0).unwrap()
-    }
-}
-
-impl From<Obs> for ObsBatch {
-    fn from(obs: Obs) -> Self {
-        let tensor = obs.into();
-        Self(TensorBatch::from_tensor(tensor))
-    }
-}
-
-#[derive(Clone, Debug, Act)]
-struct Act(GymDiscreteAct);
-
-#[derive(SubBatch)]
-struct ActBatch(TensorBatch);
-
-impl From<Act> for ActBatch {
-    fn from(act: Act) -> Self {
-        let tensor = act.into();
-        Self(TensorBatch::from_tensor(tensor))
-    }
-}
-
-fn main() {}
diff --git a/border-derive/src/act.rs b/border-derive/src/act.rs
index f18566f3..1ddf9ea5 100644
--- a/border-derive/src/act.rs
+++ b/border-derive/src/act.rs
@@ -56,7 +56,8 @@ fn py_gym_env_cont_act(
                     .iter()
                     .map(|x| *x as usize)
                     .collect::<Vec<_>>();
-                let act: Vec<f32> = t.into();
+                use std::convert::TryInto;
+                let act: Vec<f32> = t.try_into().unwrap();
 
                 let act = ndarray::Array1::<f32>::from(act).into_shape(ndarray::IxDyn(&shape)).unwrap();
 
@@ -121,7 +122,8 @@ fn py_gym_env_disc_act(
 
             impl From<tch::Tensor> for #ident {
                 fn from(t: tch::Tensor) -> Self {
-                    let data: Vec<i64> = t.into();
+                    use std::convert::TryInto;
+                    let data: Vec<i64> = t.try_into().unwrap();
                     let data: Vec<_> = data.iter().map(|e| *e as i32).collect();
                     #ident(GymDiscreteAct::new(data))
                 }
diff --git a/border-derive/src/lib.rs b/border-derive/src/lib.rs
index 020fc23d..e5874c76 100644
--- a/border-derive/src/lib.rs
+++ b/border-derive/src/lib.rs
@@ -1,9 +1,207 @@
-//! Derive macros for making newtypes of types that implements
-//! `border_core::Obs`, `border_core::Act` and
-//! `order_core::replay_buffer::SubBatch`.
+//! Derive macros for implementing [`border_core::Act`] and
+//! [`border_core::generic_replay_buffer::BatchBase`].
 //!
-//! These macros will implements some conversion traits for combining
-//! interfaces of an environment and an agent.
+//! # Examples
+//!
+//! ## Newtype for [`BorderAtariAct`]
+//!
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_atari_env::BorderAtariAct;
+//! #
+//! #[derive(Clone, Debug, Act)]
+//! struct MyAct(BorderAtariAct);
+//! ```
+//!
+//! The above code will generate the following implementation:
+//!
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_atari_env::BorderAtariAct;
+//! #
+//! #[derive(Clone, Debug)]
+//! struct MyAct(BorderAtariAct);
+//! impl border_core::Act for MyAct {
+//!     fn len(&self) -> usize {
+//!         self.0.len()
+//!     }
+//! }
+//! impl Into<BorderAtariAct> for MyAct {
+//!     fn into(self) -> BorderAtariAct {
+//!         self.0
+//!     }
+//! }
+//! /// The following code is generated when features="tch" is enabled.
+//! impl From<MyAct> for tch::Tensor {
+//!     fn from(act: MyAct) -> tch::Tensor {
+//!         let v = vec![act.0.act as i64];
+//!         let t: tch::Tensor = std::convert::TryFrom::<Vec<i64>>::try_from(v).unwrap();
+//!         t.unsqueeze(0)
+//!     }
+//! }
+//! impl From<tch::Tensor> for MyAct {
+//!     fn from(t: tch::Tensor) -> Self {
+//!         let data: Vec<i64> = {
+//!             let t = t.to_dtype(tch::Kind::Int64, false, true);
+//!             let n = t.numel();
+//!             let mut data = vec![0i64; n];
+//!             t.f_copy_data(&mut data, n).unwrap();
+//!             data
+//!         };
+//!         MyAct(BorderAtariAct::new(data[0] as u8))
+//!     }
+//! }
+//! ```
+//!
+//! ## Newtype for [`GymContinuousAct`]
+//!
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_py_gym_env::GymContinuousAct;
+//! #
+//! #[derive(Clone, Debug, Act)]
+//! struct MyAct(GymContinuousAct);
+//! ```
+//!
+//! The above code will generate the following implementation:
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_py_gym_env::GymContinuousAct;
+//! #
+//! #[derive(Clone, Debug)]
+//! struct MyAct(GymContinuousAct);
+//! impl border_core::Act for MyAct {
+//!     fn len(&self) -> usize {
+//!         self.0.len()
+//!     }
+//! }
+//! impl Into<GymContinuousAct> for MyAct {
+//!     fn into(self) -> GymContinuousAct {
+//!         self.0
+//!     }
+//! }
+//!  /// The following code is generated when features="tch" is enabled.
+//! impl From<MyAct> for tch::Tensor {
+//!     fn from(act: MyAct) -> tch::Tensor {
+//!         let v = act.0.act.iter().map(|e| *e as f32).collect::<Vec<_>>();
+//!         let t: tch::Tensor = std::convert::TryFrom::<Vec<f32>>::try_from(v).unwrap();
+//!         t.unsqueeze(0)
+//!     }
+//! }
+//! impl From<tch::Tensor> for MyAct {
+//!     /// `t` must be a 1-dimentional tensor of `f32`.
+//!     fn from(t: tch::Tensor) -> Self {
+//!         let shape = t.size()[1..].iter().map(|x| *x as usize).collect::<Vec<_>>();
+//!         use std::convert::TryInto;
+//!         let act: Vec<f32> = t.try_into().unwrap();
+//!         let act = ndarray::Array1::<f32>::from(act)
+//!             .into_shape(ndarray::IxDyn(&shape))
+//!             .unwrap();
+//!         MyAct(GymContinuousAct::new(act))
+//!     }
+//! }
+//! ```
+//!
+//! ## Newtype for [`GymDiscreteAct`]
+//!
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_py_gym_env::GymDiscreteAct;
+//! #
+//! #[derive(Clone, Debug, Act)]
+//! struct MyAct(GymDiscreteAct);
+//! ```
+//!
+//! The above code will generate the following implementation:
+//! ```
+//! # use border_core::Act;
+//! # use border_derive::Act;
+//! # use border_py_gym_env::GymDiscreteAct;
+//! #
+//! #[derive(Clone, Debug)]
+//! struct MyAct(GymDiscreteAct);
+//! impl border_core::Act for MyAct {
+//!     fn len(&self) -> usize {
+//!         self.0.len()
+//!     }
+//! }
+//! impl Into<GymDiscreteAct> for MyAct {
+//!     fn into(self) -> GymDiscreteAct {
+//!         self.0
+//!     }
+//! }
+//! impl From<MyAct> for tch::Tensor {
+//!     fn from(act: MyAct) -> tch::Tensor {
+//!         let v = act.0.act.iter().map(|e| *e as i64).collect::<Vec<_>>();
+//!         let t: tch::Tensor = std::convert::TryFrom::<Vec<i64>>::try_from(v).unwrap();
+//!         t.unsqueeze(0)
+//!     }
+//! }
+//! impl From<tch::Tensor> for MyAct {
+//!     fn from(t: tch::Tensor) -> Self {
+//!         use std::convert::TryInto;
+//!         let data: Vec<i64> = t.try_into().unwrap();
+//!         let data: Vec<_> = data.iter().map(|e| *e as i32).collect();
+//!         MyAct(GymDiscreteAct::new(data))
+//!     }
+//! }
+//! ```
+//!
+//! ## Newtype for [`TensorBatch`]
+//!
+//! ```
+//! # use border_derive::BatchBase;
+//! # use border_tch_agent::TensorBatch;
+//! #
+//! #[derive(Clone, BatchBase)]
+//! struct MyBatch(TensorBatch);
+//! ```
+//!
+//! The above code will generate the following implementation:
+//!
+//! ```
+//! # use border_derive::BatchBase;
+//! # use border_tch_agent::TensorBatch;
+//! #
+//! #[derive(Clone)]
+//! struct ObsBatch(TensorBatch);
+//! impl border_core::generic_replay_buffer::BatchBase for ObsBatch {
+//!     fn new(capacity: usize) -> Self {
+//!         Self(TensorBatch::new(capacity))
+//!     }
+//!     fn push(&mut self, i: usize, data: Self) {
+//!         self.0.push(i, data.0)
+//!     }
+//!     fn sample(&self, ixs: &Vec<usize>) -> Self {
+//!         let buf = self.0.sample(ixs);
+//!         Self(buf)
+//!     }
+//! }
+//! impl From<TensorBatch> for ObsBatch {
+//!     fn from(obs: TensorBatch) -> Self {
+//!         ObsBatch(obs)
+//!     }
+//! }
+//! impl From<ObsBatch> for tch::Tensor {
+//!     fn from(b: ObsBatch) -> Self {
+//!         b.0.into()
+//!     }
+//! }
+//! ```
+//!
+//! [`border_core::Obs`]: border_core::Obs
+//! [`border_core::Act`]: border_core::Act
+//! [`border_core::generic_replay_buffer::BatchBase`]: border_core::generic_replay_buffer::BatchBase
+//! [`BorderAtariAct`]: border_atari_env::BorderAtariAct
+//! [`GymContinuousAct`]: border_py_gym_env::GymContinuousAct
+//! [`GymDiscreteAct`]: border_py_gym_env::GymDiscreteAct
+//! [`TensorBatch`]: border_tch_agent::TensorBatch
+
 mod act;
 mod obs;
 mod subbatch;
@@ -11,18 +209,19 @@ use proc_macro::{self, TokenStream};
 
 /// Implements `border_core::Obs` for the newtype that wraps
 /// PyGymEnvObs or BorderAtariObs.
+#[deprecated]
 #[proc_macro_derive(Obs, attributes(my_trait))]
 pub fn derive1(input: TokenStream) -> TokenStream {
     obs::derive(input)
 }
 
-/// Implements `border_core::generic_replay_buffer::BatchBase` for the newtype.
+/// Implements [`border_core::generic_replay_buffer::BatchBase`] for the newtype.
 #[proc_macro_derive(BatchBase, attributes(my_trait))]
 pub fn derive2(input: TokenStream) -> TokenStream {
     subbatch::derive(input)
 }
 
-/// Implements `border_core::Act` for the newtype.
+/// Implements [`border_core::Act`] for the newtype.
 #[proc_macro_derive(Act, attributes(my_trait))]
 pub fn derive3(input: TokenStream) -> TokenStream {
     act::derive(input)
diff --git a/border-derive/src/obs.rs b/border-derive/src/obs.rs
index 8b45dda3..a63c4d7c 100644
--- a/border-derive/src/obs.rs
+++ b/border-derive/src/obs.rs
@@ -5,12 +5,11 @@ use syn::{parse_macro_input, DeriveInput};
 
 pub fn derive(input: TokenStream) -> TokenStream {
     let input = parse_macro_input!(input);
-    // let opts = Opts::from_derive_input(&input).expect("Wrong options");
     let DeriveInput { ident, data, .. } = input;
     let field_type = get_field_type(data);
     let field_type_str = get_type_str(
         field_type.clone(),
-        "The item for deriving Obs must be a new type like MyObs(PyGymEnvObs)",
+        "The item for deriving Obs must be a new type like MyObs(BorderAtariObs)",
     );
 
     // let output = if field_type_str == "PyGymEnvObs" {
diff --git a/border-policy-no-backend/Cargo.toml b/border-policy-no-backend/Cargo.toml
new file mode 100644
index 00000000..53bf671d
--- /dev/null
+++ b/border-policy-no-backend/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "border-policy-no-backend"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
+readme = "README.md"
+
+[dependencies]
+border-core = { version = "0.0.7", path = "../border-core" }
+border-tch-agent = { version = "0.0.7", path = "../border-tch-agent", optional = true }
+serde = { workspace = true, features = ["derive"] }
+log = { workspace = true }
+anyhow = { workspace = true }
+tch = { workspace = true, optional = true }
+
+[dev-dependencies]
+tempdir = { workspace = true }
+tch = { workspace = true }
+
+
+[features]
+border-tch-agent = ["dep:border-tch-agent", "dep:tch"]
diff --git a/border-policy-no-backend/src/lib.rs b/border-policy-no-backend/src/lib.rs
new file mode 100644
index 00000000..93053528
--- /dev/null
+++ b/border-policy-no-backend/src/lib.rs
@@ -0,0 +1,6 @@
+//! Policy with no backend.
+mod mat;
+mod mlp;
+
+pub use mat::Mat;
+pub use mlp::Mlp;
diff --git a/border-policy-no-backend/src/mat.rs b/border-policy-no-backend/src/mat.rs
new file mode 100644
index 00000000..5a429cd8
--- /dev/null
+++ b/border-policy-no-backend/src/mat.rs
@@ -0,0 +1,107 @@
+//! A matrix object.
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
+pub struct Mat {
+    pub data: Vec<f32>,
+    pub shape: Vec<i32>,
+}
+
+#[cfg(feature = "border-tch-agent")]
+impl From<tch::Tensor> for Mat {
+    fn from(x: tch::Tensor) -> Self {
+        let shape: Vec<i32> = x.size().iter().map(|e| *e as i32).collect();
+        let (n, shape) = match shape.len() {
+            1 => (shape[0] as usize, vec![shape[0], 1]),
+            2 => ((shape[0] * shape[1]) as usize, shape),
+            _ => panic!("Invalid matrix size: {:?}", shape),
+        };
+        let mut data: Vec<f32> = vec![0f32; n];
+        x.f_copy_data(&mut data, n).unwrap();
+        Self { data, shape }
+    }
+}
+
+impl Mat {
+    pub fn matmul(&self, x: &Mat) -> Self {
+        let (m, l, n) = (
+            self.shape[0] as usize,
+            self.shape[1] as usize,
+            x.shape[1] as usize,
+        );
+        // println!("{:?}", (m, l, x.shape[0], n));
+        let mut data = vec![0.0f32; (m * n) as usize];
+        for i in 0..m as usize {
+            for j in 0..n as usize {
+                let kk = i * n as usize + j;
+                for k in 0..l as usize {
+                    data[kk] += self.data[i * l + k] * x.data[k * n + j];
+                }
+            }
+        }
+
+        Self {
+            shape: vec![m as _, n as _],
+            data,
+        }
+    }
+
+    pub fn add(&self, x: &Mat) -> Self {
+        if self.shape[0] != x.shape[0] || self.shape[1] != x.shape[1] {
+            panic!(
+                "Trying to add matrices of different sizes: {:?}",
+                (&self.shape, &x.shape)
+            );
+        }
+
+        let data = self
+            .data
+            .iter()
+            .zip(x.data.iter())
+            .map(|(a, b)| *a + *b)
+            .collect();
+
+        Mat {
+            data,
+            shape: self.shape.clone(),
+        }
+    }
+
+    pub fn relu(&self) -> Self {
+        let data = self
+            .data
+            .iter()
+            .map(|a| match *a < 0. {
+                true => 0.,
+                false => *a,
+            })
+            .collect();
+
+        Self {
+            data,
+            shape: self.shape.clone(),
+        }
+    }
+
+    pub fn empty() -> Self {
+        Self {
+            data: vec![],
+            shape: vec![0, 0],
+        }
+    }
+
+    pub fn shape(&self) -> &Vec<i32> {
+        &self.shape
+    }
+
+    pub fn new(data: Vec<f32>, shape: Vec<i32>) -> Self {
+        Self { data, shape }
+    }
+}
+
+impl From<Vec<f32>> for Mat {
+    fn from(x: Vec<f32>) -> Self {
+        let shape = vec![x.len() as i32, 1];
+        Self { shape, data: x }
+    }
+}
diff --git a/border-policy-no-backend/src/mlp.rs b/border-policy-no-backend/src/mlp.rs
new file mode 100644
index 00000000..8805a50d
--- /dev/null
+++ b/border-policy-no-backend/src/mlp.rs
@@ -0,0 +1,44 @@
+use crate::Mat;
+use serde::{Deserialize, Serialize};
+
+#[cfg(feature = "border-tch-agent")]
+use tch::nn::VarStore;
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+/// Multilayer perceptron with ReLU activation function.
+pub struct Mlp {
+    /// Weights of layers.
+    ws: Vec<Mat>,
+
+    /// Biases of layers.
+    bs: Vec<Mat>,
+}
+
+impl Mlp {
+    pub fn forward(&self, x: &Mat) -> Mat {
+        let n_layers = self.ws.len();
+        let mut x = x.clone();
+        for i in 0..n_layers {
+            x = self.ws[i].matmul(&x).add(&self.bs[i]);
+            if i != n_layers - 1 {
+                x = x.relu();
+            }
+        }
+        x
+    }
+
+    #[cfg(feature = "border-tch-agent")]
+    pub fn from_varstore(vs: &VarStore, w_names: &[&str], b_names: &[&str]) -> Self {
+        let vars = vs.variables();
+        let ws: Vec<Mat> = w_names
+            .iter()
+            .map(|name| vars[&name.to_string()].copy().into())
+            .collect();
+        let bs: Vec<Mat> = b_names
+            .iter()
+            .map(|name| vars[&name.to_string()].copy().into())
+            .collect();
+
+        Self { ws, bs }
+    }
+}
diff --git a/border-policy-no-backend/tests/test.rs b/border-policy-no-backend/tests/test.rs
new file mode 100644
index 00000000..f1b66b07
--- /dev/null
+++ b/border-policy-no-backend/tests/test.rs
@@ -0,0 +1,24 @@
+use border_policy_no_backend::Mat;
+use tch::Tensor;
+
+#[test]
+fn test_matmul() {
+    let x1 = Tensor::from_slice2(&[&[1.0f32, 2., 3.], &[4., 5., 6.]]);
+    let y1 = Tensor::from_slice(&[7.0f32, 8., 9.]);
+    let z1 = x1.matmul(&y1);
+
+    let x2: Mat = x1.into();
+    let y2: Mat = y1.into();
+    let z2 = x2.matmul(&y2);
+
+    let z3 = {
+        let mut data = vec![0.0f32; 2];
+        z1.f_copy_data(&mut data, 2).unwrap();
+        Mat {
+            shape: vec![2 as _, 1 as _],
+            data,
+        }
+    };
+
+    assert_eq!(z2, z3)
+}
diff --git a/border-py-gym-env/src/act/continuous_filter.rs b/border-py-gym-env/src/act/continuous_filter.rs
index eb0350cf..68251c92 100644
--- a/border-py-gym-env/src/act/continuous_filter.rs
+++ b/border-py-gym-env/src/act/continuous_filter.rs
@@ -22,7 +22,7 @@ impl Default for ContinuousActFilterConfig {
 
 /// Raw filter for continuous actions.
 ///
-/// Type `A` must implements `Into<ArrayD<f32>>`
+/// Type `A` must implements `Into<ArrayD<f32>>`.
 #[derive(Clone, Debug)]
 pub struct ContinuousActFilter<A> {
     // `true` indicates that this filter is used in a vectorized environment.
diff --git a/border-py-gym-env/src/act_c.rs b/border-py-gym-env/src/act_c.rs
deleted file mode 100644
index 574f1c13..00000000
--- a/border-py-gym-env/src/act_c.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-//! Continuous action for [`GymEnv`](crate::GymEnv).
-mod base;
-pub use base::GymContinuousAct;
-use ndarray::ArrayD;
-use numpy::PyArrayDyn;
-use pyo3::{IntoPy, PyObject};
-
-/// Convert [`ArrayD<f32>`] to [`PyObject`].
-///
-/// This function does not support batch action.
-pub fn to_pyobj(act: ArrayD<f32>) -> PyObject {
-    // let act = act.remove_axis(ndarray::Axis(0));
-    pyo3::Python::with_gil(|py| {
-        let act = PyArrayDyn::<f32>::from_array(py, &act);
-        act.into_py(py)
-    })
-}
diff --git a/border-py-gym-env/src/act_c/base.rs b/border-py-gym-env/src/act_c/base.rs
deleted file mode 100644
index 4919bd58..00000000
--- a/border-py-gym-env/src/act_c/base.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use border_core::Act;
-use ndarray::ArrayD;
-use std::fmt::Debug;
-
-/// Represents an action.
-#[derive(Clone, Debug)]
-pub struct GymContinuousAct {
-    /// Stores an action.
-    pub act: ArrayD<f32>,
-}
-
-impl GymContinuousAct {
-    /// Constructs an action.
-    pub fn new(act: ArrayD<f32>) -> Self {
-        Self { act }
-    }
-}
-
-impl Act for GymContinuousAct {
-    fn len(&self) -> usize {
-        let shape = self.act.shape();
-        if shape.len() == 1 {
-            1
-        } else {
-            shape[0]
-        }
-    }
-}
diff --git a/border-py-gym-env/src/act_d.rs b/border-py-gym-env/src/act_d.rs
deleted file mode 100644
index 7cac4219..00000000
--- a/border-py-gym-env/src/act_d.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-//! Discrete action for [`GymEnv`](crate::GymEnv).
-mod base;
-pub use base::GymDiscreteAct;
diff --git a/border-py-gym-env/src/act_d/base.rs b/border-py-gym-env/src/act_d/base.rs
deleted file mode 100644
index 5afb829b..00000000
--- a/border-py-gym-env/src/act_d/base.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-use border_core::Act;
-use std::fmt::Debug;
-
-/// Represents action.
-#[derive(Clone, Debug)]
-pub struct GymDiscreteAct {
-    pub act: Vec<i32>,
-}
-
-impl GymDiscreteAct {
-    /// Constructs a discrete action.
-    pub fn new(act: Vec<i32>) -> Self {
-        Self { act }
-    }
-}
-
-impl Act for GymDiscreteAct {
-    fn len(&self) -> usize {
-        self.act.len()
-    }
-}
diff --git a/border-py-gym-env/src/atari.rs b/border-py-gym-env/src/atari.rs
index 6b64f1ad..cd3b130e 100644
--- a/border-py-gym-env/src/atari.rs
+++ b/border-py-gym-env/src/atari.rs
@@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize};
 #[derive(Debug, Serialize, Deserialize)]
 /// Specifies training or evaluation mode.
 #[derive(Clone)]
+// TODO: consider to remove this enum
 pub enum AtariWrapper {
     /// Training mode
     Train,
diff --git a/border-py-gym-env/src/base.rs b/border-py-gym-env/src/base.rs
index 11eb19f2..e0d09954 100644
--- a/border-py-gym-env/src/base.rs
+++ b/border-py-gym-env/src/base.rs
@@ -22,6 +22,8 @@ pub struct GymInfo {}
 impl Info for GymInfo {}
 
 /// Convert [`PyObject`] to [`GymEnv`]::Obs with a preprocessing.
+/// 
+/// [`PyObject`]: https://docs.rs/pyo3/0.14.5/pyo3/type.PyObject.html
 pub trait GymObsFilter<O: Obs> {
     /// Configuration.
     type Config: Clone + Default + Serialize + DeserializeOwned;
@@ -50,7 +52,7 @@ pub trait GymObsFilter<O: Obs> {
 
 /// Convert [`GymEnv`]::Act to [`PyObject`] with a preprocessing.
 ///
-/// This trait should support vectorized environments.
+/// [`PyObject`]: https://docs.rs/pyo3/0.14.5/pyo3/type.PyObject.html
 pub trait GymActFilter<A: Act> {
     /// Configuration.
     type Config: Clone + Default + Serialize + DeserializeOwned;
@@ -79,7 +81,7 @@ pub trait GymActFilter<A: Act> {
     }
 }
 
-/// An environment in [OpenAI gym](https://github.com/openai/gym).
+/// An wrapper of [Gymnasium](https://gymnasium.farama.org).
 #[derive(Debug)]
 pub struct GymEnv<O, A, OF, AF>
 where
diff --git a/border-py-gym-env/src/lib.rs b/border-py-gym-env/src/lib.rs
index c84f0612..80c4b101 100644
--- a/border-py-gym-env/src/lib.rs
+++ b/border-py-gym-env/src/lib.rs
@@ -4,34 +4,43 @@
 //! It has been tested on some of [classic control](https://gymnasium.farama.org/environments/classic_control/) and
 //! [Gymnasium-Robotics](https://robotics.farama.org) environments.
 //!
-//! ```note
-//! In a past, [`Atari`](https://gym.openai.com/envs/#atari), and
-//! [`PyBullet`](https://github.com/benelot/pybullet-gym) environments were supported.
-//! However, currently they are not tested.
-//! ```
-//!
-//! This wrapper accepts array-like observation and action
-//! ([`Box`](https://github.com/openai/gym/blob/master/gym/spaces/box.py) spaces), and
-//! discrete action. In order to interact with Python interpreter where gym is running,
-//! [`GymObsFilter`] and [`GymActFilter`] provides interfaces for converting Python object
-//! (numpy array) to/from ndarrays in Rust. [`GymObsFilter`],
-//! [`ContinuousActFilter`] and [`DiscreteActFilter`] do the conversion for environments
-//! where observation and action are arrays. In addition to the data conversion between Python and Rust,
-//! we can implements arbitrary preprocessing in these filters. For example, [`FrameStackFilter`] keeps
-//! four consevutive observation frames (images) and outputs a stack of these frames.
-//!
-//! For Atari environments, a tweaked version of
-//! [`atari_wrapper.py`](https://github.com/taku-y/border/blob/main/examples/atari_wrappers.py)
-//! is required to be in `PYTHONPATH`. The frame stacking preprocessing is implemented in
-//! [`FrameStackFilter`] as an [`GymObsFilter`].
-//!
-//! Examples with a random controller ([`Policy`](border_core::Policy)) are in
-//! [`examples`](https://github.com/taku-y/border/blob/main/border-py-gym-env/examples) directory.
-//! Examples with `border-tch-agents`, which are collections of RL agents implemented with tch-rs,
-//! are in [here](https://github.com/taku-y/border/blob/main/border/examples).
+//! In order to bridge Python and Rust, we need to convert Python objects to Rust objects and vice versa.
+//! 
+//! ## Observation
+//! 
+//! Obsservation is created in Python and passed to Rust as a Python object. In order to convert
+//! Python object to Rust object, this crate provides [`GymObsFilter`] trait. This trait has
+//! [`GymObsFilter::filt`] method which converts Python object to Rust object.
+//! The type of the Rust object after conversion corresponds to the type parameter `O` of the trait
+//! and this is also the type of the observation in the environment, i.e., [`GymEnv`]`::Obs`.
+//! 
+//! There are two built-in implementations of [`GymObsFilter`]: [`ArrayObsFilter`] and [`ArrayDictObsFilter`].
+//! [`ArrayObsFilter`] is for environments where observation is an array (e.g., CartPole).
+//! Internally, the array is converted to [`ndarray::ArrayD`] from Python object.
+//! Then, the array is converted to the type parameter `O` of the filter.
+//! Since `O` must implement [`From<ndarray::ArrayD>`] by trait bound, the conversion is done
+//! by calling `array.into()`.
+//! 
+//! [`ArrayDictObsFilter`] is for environments where observation is a dictionary of arrays (e.g., FetchPickAndPlace).
+//! Internally, the dictionary is converted to `Vec<(String, border_py_gym_env:util::Array)>` from Python object.
+//! Then, `Vec<(String, border_py_gym_env:util::Array)>` is converted to `O` by calling `into()`.
+//! 
+//! ## Action
+//! 
+//! Action is created in [`Policy`] and passed to Python as a Python object. In order to convert
+//! Rust object to Python object, this crate provides [`GymActFilter`] trait. This trait has
+//! [`GymActFilter::filt`] method which converts Rust object of type `A`, which is the type parameter of
+//! the trait, to Python object.
+//! 
+//! There are two built-in implementations of [`GymActFilter`]: [`DiscreteActFilter`] and [`ContinuousActFilter`].
+//! [`DiscreteActFilter`] is for environments where action is discrete (e.g., CartPole).
+//! This filter converts `A` to [`Vec<i32>`] and then to Python object.
+//! [`ContinuousActFilter`] is for environments where action is continuous (e.g., Pendulum).
+//! This filter converts `A` to [`ArrayD`] and then to Python object.
+//! 
+//! [`Policy`]: border_core::Policy
+//! [`ArrayD`]: https://docs.rs/ndarray/0.15.1/ndarray/type.ArrayD.html
 mod act;
-mod act_c;
-mod act_d;
 mod atari;
 mod base;
 mod config;
@@ -41,14 +50,10 @@ mod vec;
 pub use act::{
     ContinuousActFilter, ContinuousActFilterConfig, DiscreteActFilter, DiscreteActFilterConfig,
 };
-pub use act_c::{to_pyobj, GymContinuousAct};
-pub use act_d::GymDiscreteAct;
-pub use atari::AtariWrapper;
+use atari::AtariWrapper;
 pub use base::{GymActFilter, GymEnv, GymInfo, GymObsFilter};
 pub use config::GymEnvConfig;
 #[allow(deprecated)]
 pub use obs::{
     ArrayDictObsFilter, ArrayDictObsFilterConfig, ArrayObsFilter, ArrayObsFilterConfig,
-    FrameStackFilter, FrameStackFilterConfig, GymObs,
 };
-// pub use vec::{PyVecGymEnv, PyVecGymEnvConfig};
diff --git a/border-py-gym-env/src/obs.rs b/border-py-gym-env/src/obs.rs
index 7da2730e..f40a5d3e 100644
--- a/border-py-gym-env/src/obs.rs
+++ b/border-py-gym-env/src/obs.rs
@@ -1,10 +1,5 @@
 //! Observation for [`GymEnv`](crate::GymEnv).
 mod array_dict_filter;
 mod array_filter;
-mod base;
-mod frame_stack_filter;
 pub use array_dict_filter::{ArrayDictObsFilter, ArrayDictObsFilterConfig};
 pub use array_filter::{ArrayObsFilter, ArrayObsFilterConfig};
-#[allow(deprecated)]
-pub use base::GymObs;
-pub use frame_stack_filter::{FrameStackFilter, FrameStackFilterConfig};
diff --git a/border-py-gym-env/src/obs/array_filter.rs b/border-py-gym-env/src/obs/array_filter.rs
index 16a5d899..0cb4023a 100644
--- a/border-py-gym-env/src/obs/array_filter.rs
+++ b/border-py-gym-env/src/obs/array_filter.rs
@@ -24,6 +24,8 @@ impl Default for ArrayObsFilterConfig {
 /// An observation filter that convertes PyObject of an numpy array.
 ///
 /// Type parameter `O` must implements [`From`]`<ArrayD>` and [`border_core::Obs`].
+/// 
+/// [`border_core::Obs`]: border_core::Obs
 pub struct ArrayObsFilter<T1, T2, O> {
     /// Marker.
     pub phantom: PhantomData<(T1, T2, O)>,
diff --git a/border-py-gym-env/src/obs/base.rs b/border-py-gym-env/src/obs/base.rs
deleted file mode 100644
index b72fb680..00000000
--- a/border-py-gym-env/src/obs/base.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-use crate::util::pyobj_to_arrayd;
-use border_core::Obs;
-use ndarray::{ArrayD, IxDyn};
-use num_traits::cast::AsPrimitive;
-use numpy::Element;
-use pyo3::PyObject;
-use std::fmt::Debug;
-use std::marker::PhantomData;
-#[cfg(feature = "tch")]
-use {std::convert::TryFrom, tch::Tensor};
-
-/// Observation represented by an [ndarray::ArrayD].
-///
-/// `S` is the shape of an observation, except for batch and process dimensions.
-/// `T` is the dtype of ndarray in the Python gym environment.
-/// For some reason, the dtype of observations in Python gym environments seems to
-/// vary, f32 or f64. To get observations in Rust side, the dtype is specified as a
-/// type parameter, instead of checking the dtype of Python array at runtime.
-#[deprecated]
-#[derive(Clone, Debug)]
-pub struct GymObs<T1, T2>
-where
-    T1: Element + Debug,
-    T2: 'static + Copy,
-{
-    pub obs: ArrayD<T2>,
-    pub(crate) phantom: PhantomData<T1>,
-}
-
-#[allow(deprecated)]
-impl<T1, T2> From<ArrayD<T2>> for GymObs<T1, T2>
-where
-    T1: Element + Debug,
-    T2: 'static + Copy,
-{
-    fn from(obs: ArrayD<T2>) -> Self {
-        Self {
-            obs,
-            phantom: PhantomData,
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<T1, T2> Obs for GymObs<T1, T2>
-where
-    T1: Debug + Element,
-    T2: 'static + Copy + Debug + num_traits::Zero,
-{
-    fn dummy(_n_procs: usize) -> Self {
-        // let shape = &mut S::shape().to_vec();
-        // shape.insert(0, n_procs as _);
-        // trace!("Shape of TchPyGymEnvObs: {:?}", shape);
-        let shape = vec![0];
-        Self {
-            obs: ArrayD::zeros(IxDyn(&shape[..])),
-            phantom: PhantomData,
-        }
-    }
-
-    fn len(&self) -> usize {
-        self.obs.shape()[0]
-    }
-}
-
-/// Convert numpy array of Python into [`GymObs`].
-#[allow(deprecated)]
-impl<T1, T2> From<PyObject> for GymObs<T1, T2>
-where
-    T1: Element + AsPrimitive<T2> + std::fmt::Debug,
-    T2: 'static + Copy,
-{
-    fn from(obs: PyObject) -> Self {
-        Self {
-            obs: pyobj_to_arrayd::<T1, T2>(obs),
-            phantom: PhantomData,
-        }
-    }
-}
-
-// #[cfg(feature = "tch")]
-// impl<S, T1, T2> From<PyGymEnvObs<S, T1, T2>> for Tensor
-// where
-//     S: Shape,
-//     T1: Element + Debug,
-//     T2: 'static + Copy,
-// {
-//     fn from(obs: PyGymEnvObs<S, T1, T2>) -> Tensor {
-//         let tmp = &obs.obs;
-//         Tensor::try_from(tmp).unwrap()
-//         // Tensor::try_from(&obs.obs).unwrap()
-//     }
-// }
-
-#[allow(deprecated)]
-#[cfg(feature = "tch")]
-impl<T1> From<GymObs<T1, f32>> for Tensor
-where
-    T1: Element + Debug,
-{
-    fn from(obs: GymObs<T1, f32>) -> Tensor {
-        let tmp = &obs.obs;
-        Tensor::try_from(tmp).unwrap()
-        // Tensor::try_from(&obs.obs).unwrap()
-    }
-}
-
-#[allow(deprecated)]
-#[cfg(feature = "tch")]
-impl<T1> From<GymObs<T1, u8>> for Tensor
-where
-    T1: Element + Debug,
-{
-    fn from(obs: GymObs<T1, u8>) -> Tensor {
-        let tmp = &obs.obs;
-        Tensor::try_from(tmp).unwrap()
-        // Tensor::try_from(&obs.obs).unwrap()
-    }
-}
diff --git a/border-py-gym-env/src/obs/frame_stack_filter.rs b/border-py-gym-env/src/obs/frame_stack_filter.rs
deleted file mode 100644
index 4f42f0de..00000000
--- a/border-py-gym-env/src/obs/frame_stack_filter.rs
+++ /dev/null
@@ -1,250 +0,0 @@
-//! An observation filter with stacking observations (frames).
-#[allow(deprecated)]
-use super::GymObs;
-use crate::GymObsFilter;
-use border_core::{
-    record::{Record, RecordValue},
-    Obs,
-};
-use ndarray::{ArrayD, Axis, SliceInfoElem}; //, SliceOrIndex};
-                                            // use ndarray::{stack, ArrayD, Axis, IxDyn, SliceInfo, SliceInfoElem};
-use num_traits::cast::AsPrimitive;
-use numpy::{Element, PyArrayDyn};
-use pyo3::{PyAny, PyObject};
-// use pyo3::{types::PyList, Py, PyAny, PyObject};
-use serde::{Deserialize, Serialize};
-use std::{fmt::Debug, marker::PhantomData};
-// use std::{convert::TryFrom, fmt::Debug, marker::PhantomData};
-
-#[allow(deprecated)]
-#[derive(Debug, Serialize, Deserialize)]
-/// Configuration of [FrameStackFilter].
-#[derive(Clone)]
-pub struct FrameStackFilterConfig {
-    n_procs: i64,
-    n_stack: i64,
-    vectorized: bool,
-}
-
-impl Default for FrameStackFilterConfig {
-    fn default() -> Self {
-        Self {
-            n_procs: 1,
-            n_stack: 4,
-            vectorized: false,
-        }
-    }
-}
-
-/// An observation filter with stacking sequence of original observations.
-///
-/// The first element of the shape `S` denotes the number of stacks (`n_stack`) and the following elements
-/// denote the shape of the partial observation, which is the observation of each environment
-/// in the vectorized environment.
-#[allow(deprecated)]
-#[derive(Debug)]
-pub struct FrameStackFilter<T1, T2, U>
-where
-    T1: Element + Debug + num_traits::identities::Zero + AsPrimitive<T2>,
-    T2: 'static + Copy + num_traits::Zero,
-    U: Obs + From<GymObs<T1, T2>>,
-{
-    // Each element in the vector corresponds to a process.
-    buffers: Vec<Option<ArrayD<T2>>>,
-
-    #[allow(dead_code)]
-    n_procs: i64,
-
-    n_stack: i64,
-
-    shape: Option<Vec<usize>>,
-
-    // Verctorized environment is not supported
-    vectorized: bool,
-
-    phantom: PhantomData<(T1, U)>,
-}
-
-#[allow(deprecated)]
-impl<T1, T2, U> FrameStackFilter<T1, T2, U>
-where
-    T1: Element + Debug + num_traits::identities::Zero + AsPrimitive<T2>,
-    T2: 'static + Copy + num_traits::Zero,
-    U: Obs + From<GymObs<T1, T2>>,
-{
-    /// Returns the default configuration.
-    pub fn default_config() -> FrameStackFilterConfig {
-        FrameStackFilterConfig::default()
-    }
-
-    /// Create slice for a dynamic array: equivalent to arr[j:(j+1), ::] in numpy.
-    ///
-    /// See <https://github.com/rust-ndarray/ndarray/issues/501>
-    fn s(shape: &Option<Vec<usize>>, j: usize) -> Vec<SliceInfoElem> {
-        // The first index of self.shape corresponds to stacking dimension,
-        // specific index.
-        let mut slicer = vec![SliceInfoElem::Index(j as isize)];
-
-        // For remaining dimensions, all elements will be taken.
-        let n = shape.as_ref().unwrap().len() - 1;
-        let (start, end, step) = (0, None, 1);
-
-        slicer.extend(vec![SliceInfoElem::Slice { start, end, step }; n]);
-        slicer
-    }
-
-    /// Update the buffer of the stacked observations.
-    ///
-    /// * `i` - Index of process.
-    fn update_buffer(&mut self, i: i64, obs: &ArrayD<T2>) {
-        let arr = if let Some(arr) = &mut self.buffers[i as usize] {
-            arr
-        } else {
-            let mut shape = obs.shape().to_vec();
-            self.shape = Some(shape.clone());
-            shape.insert(0, self.n_stack as _);
-            self.buffers[i as usize] = Some(ArrayD::zeros(shape));
-            self.buffers[i as usize].as_mut().unwrap()
-        };
-
-        // Shift stacks frame(j) <- frame(j - 1) for j=1,..,(n_stack - 1)
-        for j in (1..self.n_stack as usize).rev() {
-            let dst_slice = Self::s(&self.shape, j);
-            let src_slice = Self::s(&self.shape, j - 1);
-            let (mut dst, src) = arr.multi_slice_mut((dst_slice.as_slice(), src_slice.as_slice()));
-            dst.assign(&src);
-        }
-        arr.slice_mut(Self::s(&self.shape, 0).as_slice())
-            .assign(obs)
-    }
-
-    /// Fill the buffer, invoked when resetting
-    fn fill_buffer(&mut self, i: i64, obs: &ArrayD<T2>) {
-        if let Some(arr) = &mut self.buffers[i as usize] {
-            for j in (0..self.n_stack as usize).rev() {
-                let mut dst = arr.slice_mut(Self::s(&self.shape, j).as_slice());
-                dst.assign(&obs);
-            }
-        } else {
-            unimplemented!("fill_buffer() was called before receiving the first sample.");
-        }
-    }
-
-    /// Get ndarray from pyobj
-    fn get_ndarray(o: &PyAny) -> ArrayD<T2> {
-        debug_assert_eq!(o.get_type().name().unwrap(), "ndarray");
-        let o: &PyArrayDyn<T1> = o.extract().unwrap();
-        let o = o.to_owned_array();
-        let o = o.mapv(|elem| elem.as_());
-        o
-    }
-}
-
-#[allow(deprecated)]
-impl<T1, T2, U> GymObsFilter<U> for FrameStackFilter<T1, T2, U>
-where
-    T1: Element + Debug + num_traits::identities::Zero + AsPrimitive<T2>,
-    T2: 'static + Copy + num_traits::Zero + Into<f32>,
-    U: Obs + From<GymObs<T1, T2>>,
-{
-    type Config = FrameStackFilterConfig;
-
-    fn build(config: &Self::Config) -> anyhow::Result<Self>
-    where
-        Self: Sized,
-    {
-        Ok(FrameStackFilter {
-            buffers: vec![None; config.n_procs as usize],
-            n_procs: config.n_procs,
-            n_stack: config.n_stack,
-            shape: None,
-            vectorized: config.vectorized,
-            phantom: PhantomData,
-        })
-    }
-
-    fn filt(&mut self, obs: PyObject) -> (U, Record) {
-        if self.vectorized {
-            unimplemented!();
-            // // Processes the input observation to update `self.buffer`
-            // pyo3::Python::with_gil(|py| {
-            //     debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "list");
-
-            //     let obs: Py<PyList> = obs.extract(py).unwrap();
-
-            //     for (i, o) in (0..self.n_procs).zip(obs.as_ref(py).iter()) {
-            //         let o = Self::get_ndarray(o);
-            //         self.update_buffer(i, &o);
-            //     }
-            // });
-
-            // // Returned values
-            // let array_views: Vec<_> = self.buffer.iter().map(|a| a.view()).collect();
-            // let obs = PyGymEnvObs::from(stack(Axis(0), array_views.as_slice()).unwrap());
-            // let obs = U::from(obs);
-
-            // // TODO: add contents in the record
-            // let record = Record::empty();
-
-            // (obs, record)
-        } else {
-            // Update the buffer with obs
-            pyo3::Python::with_gil(|py| {
-                debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "ndarray");
-                let o = Self::get_ndarray(obs.as_ref(py));
-                self.update_buffer(0, &o);
-            });
-
-            // Returns stacked observation in the buffer
-            // img.shape() = [1, 4, 1, 84, 84]
-            // [batch_size, n_stack, color_ch, width, height]
-            let img = self.buffers[0].clone().unwrap().insert_axis(Axis(0));
-            let data = img.iter().map(|&e| e.into()).collect::<Vec<_>>();
-            let shape = [img.shape()[3] * self.n_stack as usize, img.shape()[4]];
-
-            let obs = GymObs::from(img);
-            let obs = U::from(obs);
-
-            // TODO: add contents in the record
-            let mut record = Record::empty();
-            record.insert("frame_stack_filter_out", RecordValue::Array2(data, shape));
-
-            (obs, record)
-        }
-    }
-
-    fn reset(&mut self, obs: PyObject) -> U {
-        if self.vectorized {
-            unimplemented!();
-            // pyo3::Python::with_gil(|py| {
-            //     debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "list");
-
-            //     let obs: Py<PyList> = obs.extract(py).unwrap();
-
-            //     for (i, o) in (0..self.n_procs).zip(obs.as_ref(py).iter()) {
-            //         if o.get_type().name().unwrap() != "NoneType" {
-            //             let o = Self::get_ndarray(o);
-            //             self.fill_buffer(i, &o);
-            //         }
-            //     }
-            // });
-
-            // // Returned values
-            // let array_views: Vec<_> = self.buffer.iter().map(|a| a.view()).collect();
-            // O::from(stack(Axis(0), array_views.as_slice()).unwrap())
-        } else {
-            // Update the buffer if obs is not None, otherwise do nothing
-            pyo3::Python::with_gil(|py| {
-                if obs.as_ref(py).get_type().name().unwrap() != "NoneType" {
-                    debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "ndarray");
-                    let o = Self::get_ndarray(obs.as_ref(py));
-                    self.fill_buffer(0, &o);
-                }
-            });
-
-            // Returns stacked observation in the buffer
-            let frames = self.buffers[0].clone().unwrap().insert_axis(Axis(0));
-            U::from(GymObs::from(frames))
-        }
-    }
-}
diff --git a/border-py-gym-env/src/util.rs b/border-py-gym-env/src/util.rs
index 15740b8a..117eed69 100644
--- a/border-py-gym-env/src/util.rs
+++ b/border-py-gym-env/src/util.rs
@@ -1,3 +1,4 @@
+//! Utility functions mainly for data conversion between Python and Rust.
 use ndarray::{concatenate, ArrayD, Axis};
 use num_traits::cast::AsPrimitive;
 use numpy::{Element, PyArrayDyn};
diff --git a/border-tch-agent/src/dqn/base.rs b/border-tch-agent/src/dqn/base.rs
index b43663ca..3efa4e06 100644
--- a/border-tch-agent/src/dqn/base.rs
+++ b/border-tch-agent/src/dqn/base.rs
@@ -341,6 +341,10 @@ where
         record
     }
 
+    /// Save model parameters in the given directory.
+    /// 
+    /// The parameters of the model are saved as `qnet.pt`.
+    /// The parameters of the target model are saved as `qnet_tgt.pt`.
     fn save_params<T: AsRef<Path>>(&self, path: T) -> Result<()> {
         // TODO: consider to rename the path if it already exists
         fs::create_dir_all(&path)?;
diff --git a/border-tch-agent/src/dqn/model/base.rs b/border-tch-agent/src/dqn/model/base.rs
index 4d67fa52..56180183 100644
--- a/border-tch-agent/src/dqn/model/base.rs
+++ b/border-tch-agent/src/dqn/model/base.rs
@@ -11,8 +11,12 @@ use serde::{de::DeserializeOwned, Serialize};
 use std::{marker::PhantomData, path::Path};
 use tch::{nn, Device, Tensor};
 
-#[allow(clippy::upper_case_acronyms)]
-/// Represents value functions for DQN agents.
+/// Action value function model for DQN.
+/// 
+/// The architecture of the model is defined by the type parameter `Q`,
+/// which should implement [`SubModel`].
+/// This takes [`SubModel::Input`] as input and outputs a tensor.
+/// The output tensor should have the same dimension as the number of actions.
 pub struct DqnModel<Q>
 where
     Q: SubModel<Output = Tensor>,
@@ -75,7 +79,7 @@ where
         }
     }
 
-    /// Outputs the action-value given an observation.
+    /// Outputs the action-value given observation(s).
     pub fn forward(&self, x: &Q::Input) -> Tensor {
         let a = self.q.forward(&x);
         debug_assert_eq!(a.size().as_slice()[1], self.out_dim);
diff --git a/border-tch-agent/src/dqn/model/config.rs b/border-tch-agent/src/dqn/model/config.rs
index 554bfa26..ce31802c 100644
--- a/border-tch-agent/src/dqn/model/config.rs
+++ b/border-tch-agent/src/dqn/model/config.rs
@@ -8,7 +8,7 @@ use std::{
 };
 
 #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
-/// Configuration of [DqnModel](super::DqnModel).
+/// Configuration of [`DqnModel`](super::DqnModel).
 pub struct DqnModelConfig<Q>
 where
     // Q: SubModel<Output = Tensor>,
diff --git a/border-tch-agent/src/iqn/model/config.rs b/border-tch-agent/src/iqn/model/config.rs
index 19d11879..858e1dcd 100644
--- a/border-tch-agent/src/iqn/model/config.rs
+++ b/border-tch-agent/src/iqn/model/config.rs
@@ -25,7 +25,7 @@ where
 }
 
 #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
-/// Configuration of [IqnModel](super::IqnModel).
+/// Configuration of [`IqnModel`](super::IqnModel).
 ///
 /// The type parameter `F` represents a configuration struct of a feature extractor.
 /// The type parameter `M` represents a configuration struct of a model for merging
diff --git a/border-tch-agent/src/mlp/base.rs b/border-tch-agent/src/mlp/base.rs
index 674f426b..40e513d4 100644
--- a/border-tch-agent/src/mlp/base.rs
+++ b/border-tch-agent/src/mlp/base.rs
@@ -2,7 +2,7 @@ use super::{mlp, MlpConfig};
 use crate::model::{SubModel, SubModel2};
 use tch::{nn, nn::Module, Device, Tensor};
 
-/// Multilayer perceptron.
+/// Multilayer perceptron with ReLU activation function.
 pub struct Mlp {
     config: MlpConfig,
     device: Device,
diff --git a/border-tch-agent/src/model/base.rs b/border-tch-agent/src/model/base.rs
index 0b375fca..99753100 100644
--- a/border-tch-agent/src/model/base.rs
+++ b/border-tch-agent/src/model/base.rs
@@ -51,53 +51,61 @@ pub trait Model2: ModelBase {
     fn forward(&self, x1s: &Self::Input1, x2s: &Self::Input2) -> Self::Output;
 }
 
-/// Neural network model that can be initialized with [VarStore] and configuration.
+/// Neural network model that can be initialized with [`VarStore`] and configuration.
 ///
 /// The purpose of this trait is for modularity of neural network models.
-/// Modules, which consists a neural network, should share [VarStore].
-/// To do this, structs implementing this trait can be initialized with a given [VarStore].
-/// This trait also provide the ability to clone with a given [VarStore].
+/// Modules, which consists a neural network, should share [`VarStore`].
+/// To do this, structs implementing this trait can be initialized with a given [`VarStore`].
+/// This trait also provide the ability to clone with a given [`VarStore`].
 /// The ability is useful when creating a target network, used in recent deep learning algorithms in common.
+/// 
+/// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html
 pub trait SubModel {
-    /// Configuration from which [SubModel] is constructed.
+    /// Configuration from which [`SubModel`] is constructed.
     type Config;
 
-    /// Input of the [SubModel].
+    /// Input of the [`SubModel`].
     type Input;
 
-    /// Output of the [SubModel].
+    /// Output of the [`SubModel`].
     type Output;
 
-    /// Builds [SubModel] with [VarStore] and [SubModel::Config].
+    /// Builds [`SubModel`] with [`VarStore`] and [`SubModel::Config`].
     fn build(var_store: &VarStore, config: Self::Config) -> Self;
 
-    /// Clones [SubModel] with [VarStore].
+    /// Clones [`SubModel`] with [`VarStore`].
+    /// 
+    /// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html
     fn clone_with_var_store(&self, var_store: &VarStore) -> Self;
 
     /// A generalized forward function.
     fn forward(&self, input: &Self::Input) -> Self::Output;
 }
 
-/// Neural network model that can be initialized with [VarStore] and configuration.
+/// Neural network model that can be initialized with [`VarStore`] and configuration.
 ///
-/// The difference from [SubModel] is that this trait takes two inputs.
+/// The difference from [`SubModel`] is that this trait takes two inputs.
+///
+/// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html
 pub trait SubModel2 {
-    /// Configuration from which [SubModel2] is constructed.
+    /// Configuration from which [`SubModel2`] is constructed.
     type Config;
 
-    /// Input of the [SubModel2].
+    /// Input of the [`SubModel2`].
     type Input1;
 
-    /// Input of the [SubModel2].
+    /// Input of the [`SubModel2`].
     type Input2;
 
-    /// Output of the [SubModel2].
+    /// Output of the [`SubModel2`].
     type Output;
 
-    /// Builds [SubModel2] with [VarStore] and [SubModel2::Config].
+    /// Builds [`SubModel2`] with [VarStore] and [SubModel2::Config].
     fn build(var_store: &VarStore, config: Self::Config) -> Self;
 
-    /// Clones [SubModel2] with [VarStore].
+    /// Clones [`SubModel2`] with [`VarStore`].
+    /// 
+    /// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html
     fn clone_with_var_store(&self, var_store: &VarStore) -> Self;
 
     /// A generalized forward function.
diff --git a/border-tch-agent/src/opt.rs b/border-tch-agent/src/opt.rs
index 7b48b9c4..6a077c69 100644
--- a/border-tch-agent/src/opt.rs
+++ b/border-tch-agent/src/opt.rs
@@ -60,6 +60,8 @@ impl OptimizerConfig {
 /// Optimizers.
 ///
 /// This is a thin wrapper of [tch::nn::Optimizer].
+/// 
+/// [tch::nn::Optimizer]: https://docs.rs/tch/0.16.0/tch/nn/struct.Optimizer.html
 pub enum Optimizer {
     /// Adam optimizer.
     Adam(Optimizer_),
diff --git a/border-tch-agent/src/sac.rs b/border-tch-agent/src/sac.rs
index bd2b31ea..bf3e8215 100644
--- a/border-tch-agent/src/sac.rs
+++ b/border-tch-agent/src/sac.rs
@@ -1,10 +1,156 @@
 //! SAC agent.
 //!
-//! Here is an example in `border/examples/sac_pendulum.rs`
+//! Here is an example of creating SAC agent:
 //!
-//! ```rust,ignore
+//! ```no_run
+//! # use anyhow::Result;
+//! use border_core::{
+//! #     Env as Env_, Obs as Obs_, Act as Act_, Step, test::{
+//! #         TestAct as TestAct_, TestActBatch as TestActBatch_,
+//! #         TestEnv as TestEnv_,
+//! #         TestObs as TestObs_, TestObsBatch as TestObsBatch_, 
+//! #     },
+//! #     record::Record,
+//! #     generic_replay_buffer::{SimpleReplayBuffer, BatchBase},
+//!       Configurable,
+//! };
+//! use border_tch_agent::{
+//!     sac::{ActorConfig, CriticConfig, Sac, SacConfig},
+//!     mlp::{Mlp, Mlp2, MlpConfig},
+//!     opt::OptimizerConfig
+//! };
+//! 
+//! # struct TestEnv(TestEnv_);
+//! # #[derive(Clone, Debug)]
+//! # struct TestObs(TestObs_);
+//! # #[derive(Clone, Debug)]
+//! # struct TestAct(TestAct_);
+//! # struct TestObsBatch(TestObsBatch_);
+//! # struct TestActBatch(TestActBatch_);
+//! #
+//! # impl Obs_ for TestObs {
+//! #     fn dummy(n: usize) -> Self {
+//! #         Self(TestObs_::dummy(n))
+//! #     }
+//! #
+//! #     fn len(&self) -> usize {
+//! #         self.0.len()
+//! #     }
+//! # }
+//! #
+//! # impl Into<tch::Tensor> for TestObs {
+//! #     fn into(self) -> tch::Tensor {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl BatchBase for TestObsBatch {
+//! #     fn new(n: usize) -> Self {
+//! #         Self(TestObsBatch_::new(n))
+//! #     }
+//! #
+//! #     fn push(&mut self, ix: usize, data: Self) {
+//! #         self.0.push(ix, data.0);
+//! #     }
+//! #
+//! #     fn sample(&self, ixs: &Vec<usize>) -> Self {
+//! #         Self(self.0.sample(ixs))
+//! #     }
+//! # }
+//! #
+//! # impl BatchBase for TestActBatch {
+//! #     fn new(n: usize) -> Self {
+//! #         Self(TestActBatch_::new(n))
+//! #     }
+//! #
+//! #     fn push(&mut self, ix: usize, data: Self) {
+//! #         self.0.push(ix, data.0);
+//! #     }
+//! #
+//! #     fn sample(&self, ixs: &Vec<usize>) -> Self {
+//! #         Self(self.0.sample(ixs))
+//! #     }
+//! # }
+//! #
+//! # impl Act_ for TestAct {
+//! #     fn len(&self) -> usize {
+//! #         self.0.len()
+//! #     }
+//! # }
+//! #
+//! # impl From<tch::Tensor> for TestAct {
+//! #     fn from(t: tch::Tensor) -> Self {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl Into<tch::Tensor> for TestAct {
+//! #     fn into(self) -> tch::Tensor {
+//! #         unimplemented!();
+//! #     }
+//! # }
+//! #
+//! # impl Env_ for TestEnv {
+//! #     type Config = <TestEnv_ as Env_>::Config;
+//! #     type Obs = TestObs;
+//! #     type Act = TestAct;
+//! #     type Info = <TestEnv_ as Env_>::Info;
+//! #
+//! #     fn build(config: &Self::Config, seed: i64) -> Result<Self> {
+//! #         Ok(Self(TestEnv_::build(&config, seed).unwrap()))
+//! #     }
+//! #
+//! #     fn step(&mut self, act: &TestAct) -> (Step<Self>, Record) {
+//! #         let (step, record) = self.0.step(&act.0);
+//! #         let step = Step {
+//! #             obs: TestObs(step.obs),
+//! #             act: TestAct(step.act),
+//! #             reward: step.reward,
+//! #             is_terminated: step.is_terminated,
+//! #             is_truncated: step.is_truncated,
+//! #             info: step.info,
+//! #             init_obs: TestObs(step.init_obs),
+//! #         };
+//! #         (step, record)
+//! #     }
+//! #
+//! #     fn reset(&mut self, is_done: Option<&Vec<i8>>) -> Result<TestObs> {
+//! #         Ok(TestObs(self.0.reset(is_done).unwrap()))
+//! #     }
+//! #
+//! #     fn step_with_reset(&mut self, a: &TestAct) -> (Step<Self>, Record) {
+//! #         let (step, record) = self.0.step_with_reset(&a.0);
+//! #         let step = Step {
+//! #             obs: TestObs(step.obs),
+//! #             act: TestAct(step.act),
+//! #             reward: step.reward,
+//! #             is_terminated: step.is_terminated,
+//! #             is_truncated: step.is_truncated,
+//! #             info: step.info,
+//! #             init_obs: TestObs(step.init_obs),
+//! #         };
+//! #         (step, record)
+//! #     }
+//! #
+//! #     fn reset_with_index(&mut self, ix: usize) -> Result<TestObs> {
+//! #         Ok(TestObs(self.0.reset_with_index(ix).unwrap()))
+//! #     }
+//! # }
+//! #
+//! # type Env = TestEnv;
+//! # type ObsBatch = TestObsBatch;
+//! # type ActBatch = TestActBatch;
+//! # type ReplayBuffer = SimpleReplayBuffer<ObsBatch, ActBatch>;
+//! # 
+//! const DIM_OBS: i64 = 3;
+//! const DIM_ACT: i64 = 1;
+//! const LR_ACTOR: f64 = 1e-3;
+//! const LR_CRITIC: f64 = 1e-3;
+//! const BATCH_SIZE: usize = 256;
+//! 
 //! fn create_agent(in_dim: i64, out_dim: i64) -> Sac<Env, Mlp, Mlp2, ReplayBuffer> {
 //!     let device = tch::Device::cuda_if_available();
+//! 
 //!     let actor_config = ActorConfig::default()
 //!         .opt_config(OptimizerConfig::Adam { lr: LR_ACTOR })
 //!         .out_dim(out_dim)
@@ -12,25 +158,13 @@
 //!     let critic_config = CriticConfig::default()
 //!         .opt_config(OptimizerConfig::Adam { lr: LR_CRITIC })
 //!         .q_config(MlpConfig::new(in_dim + out_dim, vec![64, 64], 1, true));
-//!     let sac_config = SacConfig::default()
+//!     let sac_config = SacConfig::<Mlp, Mlp2>::default()
 //!         .batch_size(BATCH_SIZE)
-//!         .min_transitions_warmup(N_TRANSITIONS_WARMUP)
 //!         .actor_config(actor_config)
 //!         .critic_config(critic_config)
 //!         .device(device);
 //!     Sac::build(sac_config)
 //! }
-//!
-//! fn train(max_opts: usize, model_dir: &str, eval_interval: usize) -> Result<()> {
-//!     let trainer = //...
-//!     let mut agent = create_agent(DIM_OBS, DIM_ACT);
-//!     let mut recorder = TensorboardRecorder::new(model_dir);
-//!     let mut evaluator = Evaluator::new(&env_config(), 0, N_EPISODES_PER_EVAL)?;
-//!
-//!     trainer.train(&mut agent, &mut recorder, &mut evaluator)?;
-//!
-//!     Ok(())
-//! }
 //! ```
 mod actor;
 mod base;
diff --git a/border-tch-agent/src/sac/actor/base.rs b/border-tch-agent/src/sac/actor/base.rs
index 3cbf5129..756bbb77 100644
--- a/border-tch-agent/src/sac/actor/base.rs
+++ b/border-tch-agent/src/sac/actor/base.rs
@@ -10,8 +10,7 @@ use serde::{de::DeserializeOwned, Serialize};
 use std::path::Path;
 use tch::{nn, Device, Tensor};
 
-#[allow(clippy::upper_case_acronyms)]
-/// Represents a stochastic policy for SAC agents.
+/// Stochastic policy for SAC agents.
 pub struct Actor<P>
 where
     P: SubModel<Output = (Tensor, Tensor)>,
@@ -36,7 +35,7 @@ where
     P: SubModel<Output = (Tensor, Tensor)>,
     P::Config: DeserializeOwned + Serialize + OutDim,
 {
-    /// Constructs [Actor].
+    /// Constructs [`Actor`].
     pub fn build(config: ActorConfig<P::Config>, device: Device) -> Result<Actor<P>> {
         let pi_config = config.pi_config.context("pi_config is not set.")?;
         let out_dim = pi_config.get_out_dim();
diff --git a/border-tch-agent/src/sac/actor/config.rs b/border-tch-agent/src/sac/actor/config.rs
index 8d026a7f..05aec6e8 100644
--- a/border-tch-agent/src/sac/actor/config.rs
+++ b/border-tch-agent/src/sac/actor/config.rs
@@ -9,7 +9,7 @@ use std::{
 
 #[allow(clippy::upper_case_acronyms)]
 #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
-/// Configuration of [Actor](super::Actor).
+/// Configuration of [`Actor`](super::Actor).
 pub struct ActorConfig<P: OutDim> {
     pub pi_config: Option<P>,
     pub opt_config: OptimizerConfig,
diff --git a/border-tch-agent/src/sac/base.rs b/border-tch-agent/src/sac/base.rs
index 8483cae6..03b4bffc 100644
--- a/border-tch-agent/src/sac/base.rs
+++ b/border-tch-agent/src/sac/base.rs
@@ -191,6 +191,10 @@ where
             ),
         ])
     }
+
+    pub fn get_policy_net(&self) -> &Actor<P> {
+        &self.pi
+    }
 }
 
 impl<E, Q, P, R> Policy<E> for Sac<E, Q, P, R>
diff --git a/border-tch-agent/src/sac/config.rs b/border-tch-agent/src/sac/config.rs
index fb998577..4686ee32 100644
--- a/border-tch-agent/src/sac/config.rs
+++ b/border-tch-agent/src/sac/config.rs
@@ -18,8 +18,7 @@ use std::{
 };
 use tch::Tensor;
 
-/// Constructs [Sac](super::Sac).
-#[allow(clippy::upper_case_acronyms)]
+/// Configuration of [`Sac`](super::Sac).
 #[derive(Debug, Deserialize, Serialize, PartialEq)]
 pub struct SacConfig<Q, P>
 where
diff --git a/border-tch-agent/src/sac/critic/config.rs b/border-tch-agent/src/sac/critic/config.rs
index 5d0b2d8c..20045aa4 100644
--- a/border-tch-agent/src/sac/critic/config.rs
+++ b/border-tch-agent/src/sac/critic/config.rs
@@ -9,7 +9,7 @@ use std::{
 
 #[allow(clippy::upper_case_acronyms)]
 #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
-/// Configuration of [Critic](super::Critic).
+/// Configuration of [`Critic`](super::Critic).
 pub struct CriticConfig<Q> {
     pub q_config: Option<Q>,
     pub opt_config: OptimizerConfig,
diff --git a/border-tch-agent/src/tensor_batch.rs b/border-tch-agent/src/tensor_batch.rs
index ebb257ee..e1c3d38c 100644
--- a/border-tch-agent/src/tensor_batch.rs
+++ b/border-tch-agent/src/tensor_batch.rs
@@ -1,7 +1,9 @@
 use border_core::generic_replay_buffer::BatchBase;
 use tch::{Device, Tensor};
 
-/// Adds capability of constructing [Tensor] with a static method.
+/// Adds capability of constructing [`Tensor`] with a static method.
+/// 
+/// [`Tensor`]: https://docs.rs/tch/0.16.0/tch/struct.Tensor.html
 pub trait ZeroTensor {
     /// Constructs zero tensor.
     fn zeros(shape: &[i64]) -> Tensor;
@@ -37,6 +39,8 @@ impl ZeroTensor for i64 {
 /// where `shape` is obtained from the data pushed at the first time via
 /// [`TensorBatch::push`] method. `[1..]` means that the first axis of the
 /// given data is ignored as it might be batch size.
+/// 
+/// [`Tensor`]: https://docs.rs/tch/0.16.0/tch/struct.Tensor.html
 pub struct TensorBatch {
     buf: Option<Tensor>,
     capacity: i64,
diff --git a/border-tch-agent/src/util.rs b/border-tch-agent/src/util.rs
index 795df663..dcc3d3e1 100644
--- a/border-tch-agent/src/util.rs
+++ b/border-tch-agent/src/util.rs
@@ -21,9 +21,11 @@ pub enum CriticLoss {
     SmoothL1,
 }
 
-/// Apply soft update on a model.
+/// Apply soft update on variables.
 ///
 /// Variables are identified by their names.
+/// 
+/// dest = tau * src + (1.0 - tau) * dest
 pub fn track<M: ModelBase>(dest: &mut M, src: &mut M, tau: f64) {
     let src = &mut src.get_var_store().variables();
     let dest = &mut dest.get_var_store().variables();
@@ -47,15 +49,16 @@ pub fn concat_slices(s1: &[i64], s2: &[i64]) -> Vec<i64> {
     v
 }
 
-/// Returns the dimension of output vectors, i.e., the number of discrete outputs.
+/// Interface for handling output dimensions.
 pub trait OutDim {
-    /// Returns the dimension of output vectors, i.e., the number of discrete outputs.
+    /// Returns the output dimension.
     fn get_out_dim(&self) -> i64;
 
     /// Sets the  output dimension.
     fn set_out_dim(&mut self, v: i64);
 }
 
+/// Returns the mean and standard deviation of the parameters.
 pub fn param_stats(var_store: &VarStore) -> Record {
     let mut record = Record::empty();
 
diff --git a/border/Cargo.toml b/border/Cargo.toml
index 121e84b0..c888053c 100644
--- a/border/Cargo.toml
+++ b/border/Cargo.toml
@@ -109,6 +109,23 @@ path = "examples/mujoco/sac_mujoco_tch.rs"
 required-features = ["tch"]
 test = false
 
+[[example]]
+name = "convert_sac_policy_to_edge"
+path = "examples/gym/convert_sac_policy_to_edge.rs"
+required-features = ["border-tch-agent", "tch"]
+test = false
+
+# [[example]]
+# name = "sac_ant_async"
+# path = "examples/mujoco/sac_ant_async.rs"
+# required-features = ["tch", "border-async-trainer"]
+# test = false
+
+[[example]]
+name = "pendulum_edge"
+path = "examples/gym/pendulum_edge.rs"
+test = false
+
 # [[example]]
 # name = "sac_ant_async"
 # path = "examples/mujoco/sac_ant_async.rs"
@@ -133,6 +150,7 @@ border-derive = { version = "0.0.7", path = "../border-derive" }
 border-core = { version = "0.0.7", path = "../border-core" }
 border-tensorboard = { version = "0.0.7", path = "../border-tensorboard" }
 border-tch-agent = { version = "0.0.7", path = "../border-tch-agent" }
+border-policy-no-backend = { version = "0.0.7", path = "../border-policy-no-backend" }
 border-py-gym-env = { version = "0.0.7", path = "../border-py-gym-env" }
 border-atari-env = { version = "0.0.7", path = "../border-atari-env" }
 border-candle-agent = { version = "0.0.7", path = "../border-candle-agent" }
@@ -149,6 +167,7 @@ chrono = { workspace = true }
 tensorboard-rs = { workspace = true }
 thiserror = { workspace = true }
 serde_yaml = { workspace = true }
+bincode = { workspace = true }
 
 [package.metadata.docs.rs]
 features = ["doc-only"]
@@ -157,3 +176,4 @@ features = ["doc-only"]
 doc-only = ["tch/doc-only"]
 cuda = ["candle-core/cuda"]
 cudnn = ["candle-core/cudnn"]
+border-tch-agent = []
diff --git a/border/examples/gym/convert_sac_policy_to_edge.rs b/border/examples/gym/convert_sac_policy_to_edge.rs
new file mode 100644
index 00000000..8a7294ca
--- /dev/null
+++ b/border/examples/gym/convert_sac_policy_to_edge.rs
@@ -0,0 +1,215 @@
+use anyhow::Result;
+use border_core::{Agent, Configurable};
+use border_policy_no_backend::Mlp;
+use border_tch_agent::{
+    mlp,
+    model::ModelBase,
+    sac::{ActorConfig, CriticConfig, SacConfig},
+};
+use std::{fs, io::Write};
+
+const DIM_OBS: i64 = 3;
+const DIM_ACT: i64 = 1;
+
+// Dummy types
+mod dummy {
+    use super::mlp::{Mlp, Mlp2};
+    use border_tch_agent::sac::Sac as Sac_;
+
+    #[derive(Clone, Debug)]
+    pub struct DummyObs;
+
+    impl border_core::Obs for DummyObs {
+        fn dummy(_n: usize) -> Self {
+            unimplemented!();
+        }
+
+        fn len(&self) -> usize {
+            unimplemented!();
+        }
+    }
+
+    impl Into<tch::Tensor> for DummyObs {
+        fn into(self) -> tch::Tensor {
+            unimplemented!();
+        }
+    }
+
+    #[derive(Clone, Debug)]
+    pub struct DummyAct;
+
+    impl border_core::Act for DummyAct {
+        fn len(&self) -> usize {
+            unimplemented!();
+        }
+    }
+
+    impl Into<tch::Tensor> for DummyAct {
+        fn into(self) -> tch::Tensor {
+            unimplemented!();
+        }
+    }
+
+    impl From<tch::Tensor> for DummyAct {
+        fn from(_value: tch::Tensor) -> Self {
+            unimplemented!();
+        }
+    }
+
+    #[derive(Clone)]
+    pub struct DummyInnerBatch;
+
+    impl Into<tch::Tensor> for DummyInnerBatch {
+        fn into(self) -> tch::Tensor {
+            unimplemented!();
+        }
+    }
+
+    pub struct DummyBatch;
+
+    impl border_core::TransitionBatch for DummyBatch {
+        type ObsBatch = DummyInnerBatch;
+        type ActBatch = DummyInnerBatch;
+
+        fn len(&self) -> usize {
+            unimplemented!();
+        }
+
+        fn obs(&self) -> &Self::ObsBatch {
+            unimplemented!();
+        }
+
+        fn unpack(
+            self,
+        ) -> (
+            Self::ObsBatch,
+            Self::ActBatch,
+            Self::ObsBatch,
+            Vec<f32>,
+            Vec<i8>,
+            Vec<i8>,
+            Option<Vec<usize>>,
+            Option<Vec<f32>>,
+        ) {
+            unimplemented!();
+        }
+    }
+
+    pub struct DummyReplayBuffer;
+
+    impl border_core::ReplayBufferBase for DummyReplayBuffer {
+        type Batch = DummyBatch;
+        type Config = usize;
+
+        fn batch(&mut self, _size: usize) -> anyhow::Result<Self::Batch> {
+            unimplemented!();
+        }
+
+        fn build(_config: &Self::Config) -> Self {
+            unimplemented!();
+        }
+
+        fn update_priority(&mut self, _ixs: &Option<Vec<usize>>, _td_err: &Option<Vec<f32>>) {
+            unimplemented!();
+        }
+    }
+
+    #[derive(Clone, Debug)]
+    pub struct DummyInfo;
+
+    impl border_core::Info for DummyInfo {}
+
+    pub struct DummyEnv;
+
+    impl border_core::Env for DummyEnv {
+        type Config = usize;
+        type Act = DummyAct;
+        type Obs = DummyObs;
+        type Info = DummyInfo;
+
+        fn build(_config: &Self::Config, _seed: i64) -> anyhow::Result<Self>
+        where
+            Self: Sized,
+        {
+            unimplemented!();
+        }
+
+        fn reset(&mut self, _is_done: Option<&Vec<i8>>) -> anyhow::Result<Self::Obs> {
+            unimplemented!();
+        }
+
+        fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result<Self::Obs> {
+            unimplemented!();
+        }
+
+        fn step(&mut self, _a: &Self::Act) -> (border_core::Step<Self>, border_core::record::Record)
+        where
+            Self: Sized,
+        {
+            unimplemented!();
+        }
+
+        fn step_with_reset(
+            &mut self,
+            _a: &Self::Act,
+        ) -> (border_core::Step<Self>, border_core::record::Record)
+        where
+            Self: Sized,
+        {
+            unimplemented!();
+        }
+    }
+
+    pub type Env = DummyEnv;
+    pub type Sac = Sac_<Env, Mlp, Mlp2, DummyReplayBuffer>;
+}
+
+use dummy::Sac;
+
+fn create_sac_config() -> SacConfig<mlp::Mlp, mlp::Mlp2> {
+    // Omit learning related parameters
+    let actor_config = ActorConfig::default()
+        .out_dim(DIM_ACT)
+        .pi_config(mlp::MlpConfig::new(DIM_OBS, vec![64, 64], DIM_ACT, false));
+    let critic_config = CriticConfig::default().q_config(mlp::MlpConfig::new(
+        DIM_OBS + DIM_ACT,
+        vec![64, 64],
+        1,
+        false,
+    ));
+    SacConfig::default()
+        .actor_config(actor_config)
+        .critic_config(critic_config)
+        .device(tch::Device::Cpu)
+}
+
+fn main() -> Result<()> {
+    let src_path = "./border/examples/gym/model/tch/sac_pendulum/best";
+    let dest_path = "./border/examples/gym/model/edge/sac_pendulum/best/mlp.bincode";
+
+    // Load Sac model
+    let sac = {
+        let config = create_sac_config();
+        let mut sac = Sac::build(config);
+        sac.load_params(src_path)?;
+        sac
+    };
+
+    // Create Mlp
+    let mlp = {
+        let vs = sac.get_policy_net().get_var_store();
+        let w_names = ["mlp.al0.weight", "mlp.al1.weight", "ml.weight"];
+        let b_names = ["mlp.al0.bias", "mlp.al1.bias", "ml.bias"];
+        Mlp::from_varstore(vs, &w_names, &b_names)
+    };
+
+    // Serialize to file
+    let encoded = bincode::serialize(&mlp)?;
+    let mut file = fs::OpenOptions::new()
+        .create(true)
+        .write(true)
+        .open(&dest_path)?;
+    file.write_all(&encoded)?;
+
+    Ok(())
+}
diff --git a/border/examples/gym/pendulum_edge.rs b/border/examples/gym/pendulum_edge.rs
new file mode 100644
index 00000000..e81b4f97
--- /dev/null
+++ b/border/examples/gym/pendulum_edge.rs
@@ -0,0 +1,190 @@
+use anyhow::Result;
+use border_core::{DefaultEvaluator, Evaluator as _};
+use border_policy_no_backend::{Mat, Mlp};
+use border_py_gym_env::{
+    ArrayObsFilter, ContinuousActFilter, GymActFilter, GymEnv, GymEnvConfig, GymObsFilter,
+};
+use clap::Parser;
+use ndarray::ArrayD;
+use std::fs;
+
+type PyObsDtype = f32;
+
+mod obs_act_types {
+    use super::*;
+
+    #[derive(Clone, Debug)]
+    /// Observation type.
+    pub struct Obs(Mat);
+
+    impl border_core::Obs for Obs {
+        fn dummy(_n: usize) -> Self {
+            Self(Mat::empty())
+        }
+
+        fn len(&self) -> usize {
+            self.0.shape()[0] as _
+        }
+    }
+
+    impl From<ArrayD<f32>> for Obs {
+        fn from(obs: ArrayD<f32>) -> Self {
+            let obs = obs.t().to_owned();
+            let shape = obs.shape().iter().map(|e| *e as i32).collect();
+            let data = obs.into_raw_vec();
+            Self(Mat::new(data, shape))
+        }
+    }
+
+    impl From<Obs> for Mat {
+        fn from(obs: Obs) -> Mat {
+            obs.0
+        }
+    }
+
+    #[derive(Clone, Debug)]
+    pub struct Act(Mat);
+
+    impl border_core::Act for Act {}
+
+    impl From<Act> for ArrayD<f32> {
+        fn from(value: Act) -> Self {
+            // let shape: Vec<_> = value.0.shape.iter().map(|e| *e as usize).collect();
+            let shape = vec![(value.0.shape[0] * value.0.shape[1]) as usize];
+            // let data = value.0.data;
+            let data: Vec<f32> = value.0.data.iter().map(|e| 2f32 * *e).collect();
+            let t = ArrayD::from_shape_vec(shape, data).unwrap();
+            t
+        }
+    }
+
+    impl Into<Act> for Mat {
+        fn into(self) -> Act {
+            Act(self)
+        }
+    }
+}
+
+mod policy {
+    use std::{io::Read, path::Path};
+
+    use super::*;
+    use border_core::Policy;
+
+    pub struct MlpPolicy {
+        mlp: Mlp,
+    }
+
+    impl Policy<Env> for MlpPolicy {
+        fn sample(&mut self, obs: &Obs) -> Act {
+            self.mlp.forward(&obs.clone().into()).into()
+        }
+    }
+
+    impl MlpPolicy {
+        pub fn from_serialized_path(path: impl AsRef<Path>) -> Result<Self> {
+            let mut file = fs::OpenOptions::new().read(true).open(&path)?;
+            let mut buf = Vec::<u8>::new();
+            let _ = file.read_to_end(&mut buf).unwrap();
+            let mlp: Mlp = bincode::deserialize(&buf[..])?;
+            Ok(Self { mlp })
+        }
+    }
+}
+
+use obs_act_types::*;
+use policy::*;
+
+type ObsFilter = ArrayObsFilter<PyObsDtype, f32, Obs>;
+type ActFilter = ContinuousActFilter<Act>;
+type Env = GymEnv<Obs, Act, ObsFilter, ActFilter>;
+type Evaluator = DefaultEvaluator<Env, MlpPolicy>;
+
+fn env_config() -> GymEnvConfig<Obs, Act, ObsFilter, ActFilter> {
+    GymEnvConfig::<Obs, Act, ObsFilter, ActFilter>::default()
+        .name("Pendulum-v1".to_string())
+        .obs_filter_config(ObsFilter::default_config())
+        .act_filter_config(ActFilter::default_config())
+}
+
+fn eval(n_episodes: usize, render: bool) -> Result<()> {
+    let env_config = {
+        let mut env_config = env_config();
+        if render {
+            env_config = env_config
+                .render_mode(Some("human".to_string()))
+                .set_wait_in_millis(10);
+        };
+        env_config
+    };
+    let mut policy = MlpPolicy::from_serialized_path(
+        "./border/examples/gym/model/edge/sac_pendulum/best/mlp.bincode",
+    )?;
+
+    let _ = Evaluator::new(&env_config, 0, n_episodes)?.evaluate(&mut policy);
+
+    Ok(())
+}
+
+/// Train/eval SAC agent in pendulum environment
+#[derive(Parser, Debug)]
+#[command(version, about)]
+struct Args {
+    /// Train SAC agent, not evaluate
+    #[arg(short, long, default_value_t = false)]
+    train: bool,
+
+    /// Evaluate SAC agent, not train
+    #[arg(short, long, default_value_t = false)]
+    eval: bool,
+
+    /// Log metrics with MLflow
+    #[arg(short, long, default_value_t = false)]
+    mlflow: bool,
+}
+
+fn main() -> Result<()> {
+    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
+    let _ = eval(5, true)?;
+
+    // let args = Args::parse();
+
+    // if args.train {
+    //     train(
+    //         MAX_OPTS,
+    //         "./border/examples/gym/model/tch/sac_pendulum",
+    //         EVAL_INTERVAL,
+    //         args.mlflow,
+    //     )?;
+    // } else if args.eval {
+    //     eval(5, true, "./border/examples/gym/model/tch/sac_pendulum/best")?;
+    // } else {
+    //     train(
+    //         MAX_OPTS,
+    //         "./border/examples/gym/model/tch/sac_pendulum",
+    //         EVAL_INTERVAL,
+    //         args.mlflow,
+    //     )?;
+    //     eval(5, true, "./border/examples/gym/model/tch/sac_pendulum/best")?;
+    // }
+
+    Ok(())
+}
+
+// #[cfg(test)]
+// mod test {
+//     use super::*;
+//     use tempdir::TempDir;
+
+//     #[test]
+//     fn test_sac_pendulum() -> Result<()> {
+//         tch::manual_seed(42);
+
+//         let model_dir = TempDir::new("sac_pendulum_tch")?;
+//         let model_dir = model_dir.path().to_str().unwrap();
+//         train(100, model_dir, 100, false)?;
+//         eval(1, false, (model_dir.to_string() + "/best").as_str())?;
+
+//         Ok(())
+//     }
+// }
diff --git a/docker/aarch64_doc/doc.sh b/docker/aarch64_doc/doc.sh
index 4cf37c27..c823279f 100644
--- a/docker/aarch64_doc/doc.sh
+++ b/docker/aarch64_doc/doc.sh
@@ -3,4 +3,4 @@ docker run -it --rm \
     --shm-size=512m \
     --volume="$(pwd)/../..:/home/ubuntu/border" \
     border_headless bash -l -c \
-    "cd /home/ubuntu/border; CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."
+    "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 LD_LIBRARY_PATH=$HOME/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."
diff --git a/docker/aarch64_headless/Dockerfile b/docker/aarch64_headless/Dockerfile
index ace7c4a9..c3fc15b0 100644
--- a/docker/aarch64_headless/Dockerfile
+++ b/docker/aarch64_headless/Dockerfile
@@ -57,7 +57,7 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN cd /root && python3 -m venv venv
 RUN source /root/venv/bin/activate && pip3 install --upgrade pip
 RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions
-RUN source /root/venv/bin/activate && pip3 install torch==1.12.0
+RUN source /root/venv/bin/activate && pip3 install torch==2.3.0
 RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab
 RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3
 RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7
diff --git a/docker/amd64/Dockerfile b/docker/amd64/Dockerfile
index 744586a7..fdbecf3b 100644
--- a/docker/amd64/Dockerfile
+++ b/docker/amd64/Dockerfile
@@ -73,7 +73,7 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN cd /root && python3 -m venv venv
 RUN source /root/venv/bin/activate && pip3 install --upgrade pip
 RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions
-RUN source /root/venv/bin/activate && pip3 install torch==1.13.1
+RUN source /root/venv/bin/activate && pip3 install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu --timeout 300
 RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab
 RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3
 RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7
@@ -95,12 +95,6 @@ RUN cd $HOME && mkdir -p .border/model
 # # PyBulletGym
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==3.2.5
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==2.7.1
-# RUN source /home/ubuntu/venv/bin/activate && \
-#     cd $HOME && \
-#     git clone https://github.com/bulletphysics/bullet3.git && \
-#     cd bullet3 && \
-#     git checkout -b tmp 2c204c49e56ed15ec5fcfa71d199ab6d6570b3f5 && \
-#     ./build_cmake_pybullet_double.sh
 # RUN cd $HOME && \
 #     git clone https://github.com/benelot/pybullet-gym.git && \
 #     cd pybullet-gym && \
@@ -121,7 +115,7 @@ RUN echo 'export CARGO_TARGET_DIR=$HOME/target' >> ~/.bashrc
 RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc
 RUN echo 'export MUJOCO_GL=glfw' >> ~/.bashrc
 RUN echo 'source $HOME/venv/bin/activate' >> ~/.bashrc
-RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc
+# RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc
 
 RUN rm /bin/sh && mv /bin/sh_tmp /bin/sh
 
diff --git a/docker/amd64/build.sh b/docker/amd64/build.sh
index 0eb76e0e..0936264c 100644
--- a/docker/amd64/build.sh
+++ b/docker/amd64/build.sh
@@ -1,2 +1,3 @@
 #!/bin/bash
 docker build -t border .
+#podman build -t border .
diff --git a/docker/amd64/remove.sh b/docker/amd64/remove.sh
index e7a325bc..3872196d 100644
--- a/docker/amd64/remove.sh
+++ b/docker/amd64/remove.sh
@@ -1 +1,2 @@
 docker rm -f border
+#podman rm -f border
diff --git a/docker/amd64/run.sh b/docker/amd64/run.sh
index c02762ca..be2f43f3 100644
--- a/docker/amd64/run.sh
+++ b/docker/amd64/run.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
-# nvidia-docker run -it --rm \
-#     --env="DISPLAY" \
-#     --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-#     --volume="/home/taku-y:/home/taku-y" \
-#     --name my_pybullet my_pybullet bash
+nvidia-docker run -it --rm \
+    --env="DISPLAY" \
+    --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
+    --volume="/home/taku-y:/home/taku-y" \
+    --name my_pybullet my_pybullet bash
 
-docker run -td  \
-    --name border \
-    -p 6080:6080 \
-    --shm-size=512m \
-    --volume="$(pwd)/../..:/root/border" \
-    border
+# podman run -td  \
+#     --name border \
+#     -p 6080:6080 \
+#     --shm-size=512m \
+#     --volume="$(pwd)/../..:/root/border" \
+#     border
diff --git a/docker/amd64_headless/Dockerfile b/docker/amd64_headless/Dockerfile
index d460674d..c61e3ea0 100644
--- a/docker/amd64_headless/Dockerfile
+++ b/docker/amd64_headless/Dockerfile
@@ -1,13 +1,16 @@
-FROM ubuntu:focal-20221130
+FROM --platform=linux/amd64 ubuntu:22.04
 
 ENV DEBIAN_FRONTEND noninteractive
 RUN echo "Set disable_coredump false" >> /etc/sudo.conf
 RUN apt-get update -q && \
     apt-get upgrade -yq && \
-    apt-get install -yq wget curl git build-essential vim sudo libssl-dev
-
-# lsb-release locales bash-completion tzdata gosu && \
-# RUN rm -rf /var/lib/apt/lists/*
+    apt-get install -yq wget
+RUN apt-get install -yq curl
+RUN apt-get install -yq git
+RUN apt-get install -yq build-essential
+RUN apt-get install -yq vim
+# RUN apt-get install -yq sudo
+RUN apt-get install -yq libssl-dev
 
 # clang
 RUN apt install -y -q libclang-dev
@@ -18,7 +21,7 @@ RUN apt update -y && \
     DEBIAN_FRONTEND=noninteractive && \
     apt install -y -q --no-install-recommends \
         libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev libsdl2-net-dev libsdl2-ttf-dev \
-        libsdl-dev libsdl-image1.2-dev
+        libsdl-image1.2-dev libsdl1.2-dev
 
 # zip
 RUN apt install -y zip
@@ -27,7 +30,7 @@ RUN apt install -y zip
 RUN apt install -y swig
 
 # python
-RUN apt install -y python3.8 python3.8-dev python3.8-distutils python3.8-venv python3-pip
+RUN apt install -y python3.10 python3.10-dev python3.10-distutils python3.10-venv python3-pip
 
 # cmake
 RUN apt install -y cmake
@@ -44,32 +47,25 @@ RUN apt install -y patchelf libglfw3 libglfw3-dev
 # Cleanup
 RUN rm -rf /var/lib/apt/lists/*
 
-# COPY test_mujoco_py.py /test_mujoco_py.py
-# RUN chmod 777 /test_mujoco_py.py
-
-# Add user
-RUN useradd --create-home --home-dir /home/ubuntu --shell /bin/bash --user-group --groups adm,sudo ubuntu && \
-    echo ubuntu:ubuntu | chpasswd && \
-    echo "ubuntu ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
-
 # Use bash
 RUN mv /bin/sh /bin/sh_tmp && ln -s /bin/bash /bin/sh
 
-# User settings
-USER ubuntu
-
 # rustup
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 
 # python
-RUN cd /home/ubuntu && python3 -m venv venv
-RUN source /home/ubuntu/venv/bin/activate && pip3 install --upgrade pip
-RUN source /home/ubuntu/venv/bin/activate && pip3 install pyyaml typing-extensions
-RUN source /home/ubuntu/venv/bin/activate && pip3 install torch==1.12.0
-RUN source /home/ubuntu/venv/bin/activate && pip3 install ipython jupyterlab
-RUN source /home/ubuntu/venv/bin/activate && pip3 install numpy==1.21.3
-RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium[box2d]==0.29.0
-RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium-robotics==1.2.2
+RUN cd /root && python3 -m venv venv
+RUN source /root/venv/bin/activate && pip3 install --upgrade pip
+RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions
+RUN source /root/venv/bin/activate && pip3 install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu --timeout 300
+RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab
+RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3
+RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7
+RUN source /root/venv/bin/activate && pip3 install gymnasium[box2d]==0.29.0
+RUN source /root/venv/bin/activate && pip3 install gymnasium-robotics==1.2.2
+RUN source /root/venv/bin/activate && pip3 install tensorboard==2.16.2
+RUN source /root/venv/bin/activate && pip3 install tabulate==0.9.0
+RUN source /root/venv/bin/activate && pip3 install mlflow-export-import==1.2.0
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install robosuite==1.3.2
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install -U 'mujoco-py<2.2,>=2.1'
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install dm-control==1.0.8
@@ -79,20 +75,6 @@ RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium-robotics==1.
 # border
 RUN cd $HOME && mkdir -p .border/model
 
-# Mujoco amd64 binary
-RUN cd $HOME && \
-    mkdir .mujoco && \
-    cd .mujoco && \
-    wget https://github.com/deepmind/mujoco/releases/download/2.1.1/mujoco-2.1.1-linux-x86_64.tar.gz
-RUN cd $HOME/.mujoco && \
-    tar zxf mujoco-2.1.1-linux-x86_64.tar.gz && \
-    mkdir -p mujoco210/bin && \
-    ln -sf $PWD/mujoco-2.1.1/lib/libmujoco.so.2.1.1 $PWD/mujoco210/bin/libmujoco210.so && \
-    ln -sf $PWD/mujoco-2.1.1/lib/libglewosmesa.so $PWD/mujoco210/bin/libglewosmesa.so && \
-    ln -sf $PWD/mujoco-2.1.1/include/ $PWD/mujoco210/include && \
-    ln -sf $PWD/mujoco-2.1.1/model/ $PWD/mujoco210/model
-# RUN cp /*.py $HOME
-
 # # PyBulletGym
 # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==3.2.5
 # # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==2.7.1
@@ -106,22 +88,22 @@ RUN cd $HOME/.mujoco && \
 # RUN sed -i 's/return state, sum(self.rewards), bool(done), {}/return state, sum(self.rewards), bool(done), bool(done), {}/g' /home/ubuntu/pybullet-gym/pybulletgym/envs/roboschool/envs/locomotion/walker_base_env.py
 # RUN sed -i 's/id='\''AntPyBulletEnv-v0'\'',/id='\''AntPyBulletEnv-v0'\'', order_enforce=False,/g' /home/ubuntu/pybullet-gym/pybulletgym/envs/__init__.py
 
-# Env vars
-# RUN echo 'export LIBTORCH=$HOME/.local/lib/python3.8/site-packages/torch' >> ~/.bashrc
-# RUN echo 'export LD_LIBRARY_PATH=$LIBTORCH/lib' >> ~/.bashrc
-# RUN echo 'export PYTHONPATH=$HOME/border/border-py-gym-env/examples:$PYTHONPATH' >> ~/.bashrc
-# RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc
+# .bashrc
+RUN echo 'export LIBTORCH=$HOME/venv/lib/python3.10/site-packages/torch' >> ~/.bashrc
+RUN echo 'export LD_LIBRARY_PATH=$LIBTORCH/lib' >> ~/.bashrc
+RUN echo 'export LIBTORCH_CXX11_ABI=0' >> ~/.bashrc
 RUN echo 'export PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc
-ENV LIBTORCH_CXX11_ABI 0
-ENV LIBTORCH /home/ubuntu/venv/lib/python3.8/site-packages/torch
-ENV LD_LIBRARY_PATH $LIBTORCH/lib
-ENV PYTHONPATH /home/ubuntu/border/border-py-gym-env/examples:$PYTHONPATH
+RUN echo 'export PYTHONPATH=$HOME/border/border-py-gym-env/examples:$PYTHONPATH' >> ~/.bashrc
+RUN echo 'export CARGO_TARGET_DIR=$HOME/target' >> ~/.bashrc
+RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc
+RUN echo 'export MUJOCO_GL=glfw' >> ~/.bashrc
+RUN echo 'source $HOME/venv/bin/activate' >> ~/.bashrc
+RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc
 
-USER root
 RUN rm /bin/sh && mv /bin/sh_tmp /bin/sh
 
-USER ubuntu
-WORKDIR /home/ubuntu/border
+# USER root
+# WORKDIR /home/ubuntu/border
 
 # ENV USER ubuntu
 # CMD ["/bin/bash", "-l", "-c"]
diff --git a/docker/amd64_headless/build.sh b/docker/amd64_headless/build.sh
index 860261b3..86de24a2 100644
--- a/docker/amd64_headless/build.sh
+++ b/docker/amd64_headless/build.sh
@@ -1,2 +1,3 @@
 #!/bin/bash
 docker build -t border_headless .
+#podman build -t border_headless .
diff --git a/docker/amd64_headless/doc.sh b/docker/amd64_headless/doc.sh
index ac4d6098..ec32f92f 100644
--- a/docker/amd64_headless/doc.sh
+++ b/docker/amd64_headless/doc.sh
@@ -3,4 +3,11 @@ docker run -it --rm \
     --shm-size=512m \
     --volume="$(pwd)/../..:/home/ubuntu/border" \
     border_headless bash -l -c \
-    "CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."
+    "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."
+
+# podman run -it --rm \
+#     --name border_headless \
+#     --shm-size=512m \
+#     --volume="$(pwd)/../..:/home/ubuntu/border" \
+#     border_headless bash -l -c \
+#     "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."