Merge pull request #99 from taku-y/edge

Add crate border-policy-no-backend
laboroai · Aug 6, 2024 · 6347b6c · 6347b6c
2 parents 443b783 + 1dc32a0
commit 6347b6c
Show file tree

Hide file tree

Showing 81 changed files with 2,085 additions and 879 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -101,3 +101,9 @@ jobs:
             cargo test --example sac_pendulum_tch --features=tch
             cargo test --example dqn_cartpole --features=candle-core
             cargo test --example sac_pendulum --features=candle-core
+            cd border-async-trainer; cargo test; cd ..
+            cd border-atari-env; cargo test; cd ..
+            cd border-candle-agent; cargo test; cd ..
+            cd border-tch-agent; cargo test; cd ..
+            cd border-policy-no-backend; cargo test --features=border-tch-agent; cd ..
+            cd border-py-gym-env; cargo test; cd ..
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,33 +4,35 @@
 
 ### Added
 
-* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2)
-* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1)
-* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12)
+* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
+* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1).
+* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18).
+* Add crate `border-policy-no-backend`.
 
 ### Changed
 
-* Take `self` in the signature of `push()` method of replay buffer (`border-core`)
-* Fix a bug in `MlpConfig` (`border-tch-agent`)
-* Bump the version of tch to 0.16.0 (`border-tch-agent`)
-* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`)
-* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10)
+* Take `self` in the signature of `push()` method of replay buffer (`border-core`).
+* Fix a bug in `MlpConfig` (`border-tch-agent`).
+* Bump the version of tch to 0.16.0 (`border-tch-agent`).
+* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`).
+* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10).
+* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12).
 
 ## v0.0.6 (2023-09-19)
 
 ### Added
 
 * Docker files (`border`).
-* Singularity files (`border`)
-* Script for GPUSOROBAN (#67)
+* Singularity files (`border`).
+* Script for GPUSOROBAN (#67).
 * `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`.
 * Example of asynchronous trainer for native Atari environment and DQN (`border/examples`).
-* Move tensorboard recorder into a separate crate (`border-tensorboard`)
+* Move tensorboard recorder into a separate crate (`border-tensorboard`).
 
 ### Changed
 
 * Bump the version of tch-rs to 0.8.0 (`border-tch-agent`).
 * Rename agents as following the convention in Rust (`border-tch-agent`).
-* Bump the version of gym to 0.26 (`border-py-gym-env`)
-* Remove the type parameter for array shape of gym environments (`border-py-gym-env`)
-* Interface of Python-Gym interface (`border-py-gym-env`)
+* Bump the version of gym to 0.26 (`border-py-gym-env`).
+* Remove the type parameter for array shape of gym environments (`border-py-gym-env`).
+* Interface of Python-Gym interface (`border-py-gym-env`).
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,6 +9,7 @@ members = [
     "border-derive",
     "border-atari-env",
     "border-async-trainer",
+    "border-policy-no-backend",
     "border",
 ]
 exclude = ["docker/"]
@@ -49,9 +50,10 @@ segment-tree = "2.0.0"
 image = "0.23.14"
 candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] }
 candle-nn = "0.4.1"
-rand = "0.8.5"
+rand = { version = "0.8.5", features = ["small_rng"] }
 itertools = "0.12.1"
 ordered-float = "4.2.0"
 reqwest = { version = "0.11.26", features = ["json", "blocking"] }
 xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
-candle-optimisers = "0.4.0"
+candle-optimisers = "0.4.0"
+bincode = "1.3.3"
diff --git a/border-async-trainer/src/actor/base.rs b/border-async-trainer/src/actor/base.rs
@@ -10,7 +10,7 @@ use std::{
 };
 
 #[cfg_attr(doc, aquamarine::aquamarine)]
-/// Runs interaction between an [`Agent`] and an [`Env`], then generates transitions.
+/// Generate transitions by running [`Agent`] in [`Env`].
 ///
 /// ```mermaid
 /// flowchart TB
@@ -23,15 +23,18 @@ use std::{
 ///   C-->|ReplayBufferBase::PushedItem|F[ReplayBufferProxy]
 /// ```
 ///
-/// This diagram shows interaction of [`Agent`], [`Env`] and [`StepProcessor`],
-/// as shown in [`border_core::Trainer`]. However, this diagram also shows that
+/// In [`Actor`], an [`Agent`] runs on an [`Env`] and generates [`Step`] objects. 
+/// These objects are processed with [`StepProcessor`] and sent to [`ReplayBufferProxy`].
 /// The [`Agent`] in the [`Actor`] periodically synchronizes with the [`Agent`] in
-/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`], and the transitions generated by
-/// [`StepProcessor`] are sent to the [`ReplayBufferProxy`].
+/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`].
 ///
 /// See also the diagram in [`AsyncTrainer`].
 ///
 /// [`AsyncTrainer`]: crate::AsyncTrainer
+/// [`Agent`]: border_core::Agent
+/// [`Env`]: border_core::Env
+/// [`StepProcessor`]: border_core::StepProcessor
+/// [`Step`]: border_core::Step
 pub struct Actor<A, E, P, R>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
@@ -70,6 +73,7 @@ where
         env_seed: i64,
         stats: Arc<Mutex<Option<ActorStat>>>,
     ) -> Self {
+        log::info!("Create actor {}", id);
         Self {
             id,
             stop,

diff --git a/border-async-trainer/src/actor/stat.rs b/border-async-trainer/src/actor/stat.rs
@@ -1,12 +1,12 @@
 use std::time::Duration;
 
-/// Stats of sampling process in each [`Actor`](crate::Actor).
+/// Stats of sampling process in an [`Actor`](crate::Actor).
 #[derive(Clone, Debug)]
 pub struct ActorStat {
     /// The number of steps for interaction between agent and env.
     pub env_steps: usize,
 
-    /// Duration of sampling loop in [`Actor`](crate::Actor).
+    /// Duration of sampling loop in the [`Actor`](crate::Actor).
     pub duration: Duration,
 }
 

diff --git a/border-async-trainer/src/actor_manager.rs b/border-async-trainer/src/actor_manager.rs
@@ -1,4 +1,4 @@
-//! A manager of [Actor]()s.
+//! A manager of [`Actor`](crate::Actor)s.
 mod base;
 mod config;
 pub use base::ActorManager;

diff --git a/border-async-trainer/src/actor_manager/base.rs b/border-async-trainer/src/actor_manager/base.rs
@@ -15,20 +15,22 @@ use std::{
 /// Manages [`Actor`]s.
 ///
 /// This struct handles the following requests:
-/// * From the [LearnerManager]() for updating the latest model info, stored in this struct.
+/// * From the [`AsyncTrainer`] for updating the latest model info, stored in this struct.
 /// * From the [`Actor`]s for getting the latest model info.
 /// * From the [`Actor`]s for pushing sample batch to the `LearnerManager`.
+///
+/// [`AsyncTrainer`]: crate::AsyncTrainer
 pub struct ActorManager<A, E, R, P>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
     E: Env,
     P: StepProcessor<E>,
     R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
 {
-    /// Configurations of [Agent]s.
+    /// Configurations of [`Agent`]s.
     agent_configs: Vec<A::Config>,
 
-    /// Configuration of [Env].
+    /// Configuration of [`Env`].
     env_config: E::Config,
 
     /// Configuration of a `StepProcessor`.
@@ -77,7 +79,7 @@ where
     R::Item: Send + 'static,
     A::ModelInfo: Send + 'static,
 {
-    /// Builds a [ActorManager].
+    /// Builds a [`ActorManager`].
     pub fn build(
         config: &ActorManagerConfig,
         agent_configs: &Vec<A::Config>,
@@ -103,10 +105,10 @@ where
         }
     }
 
-    /// Runs threads for [Actor]s and a thread for sending samples into the replay buffer.
+    /// Runs threads for [`Actor`]s and a thread for sending samples into the replay buffer.
     ///
-    /// A thread will wait for the initial [SyncModel::ModelInfo] from [AsyncTrainer](crate::AsyncTrainer),
-    /// which blocks execution of [Actor] threads.
+    /// Each thread is blocked until receiving the initial [`SyncModel::ModelInfo`]
+    /// from [`AsyncTrainer`](crate::AsyncTrainer).
     pub fn run(&mut self, guard_init_env: Arc<Mutex<bool>>) {
         // Guard for sync of the initial model
         let guard_init_model = Arc::new(Mutex::new(true));
@@ -220,10 +222,11 @@ where
             // TODO: error handling, timeout
             // TODO: caching
             // TODO: stats
-            let msg = receiver.recv().unwrap();
-            _n_samples += 1;
-            sender.try_send(msg).unwrap();
-            // println!("{:?}", (_msg.id, n_samples));
+            let msg = receiver.recv();
+            if msg.is_ok() {
+                _n_samples += 1;
+                sender.try_send(msg.unwrap()).unwrap();    
+            }
 
             // Stop the loop
             if *stop.lock().unwrap() {

diff --git a/border-async-trainer/src/async_trainer/base.rs b/border-async-trainer/src/async_trainer/base.rs
@@ -33,25 +33,26 @@ use std::{
 ///   end
 /// ```
 ///
-/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
-///   [`ReplayBufferBase::Item`], in parallel and push the transitions into
-///   [`ReplayBufferProxy`]. It should be noted that [`ReplayBufferProxy`] has a
-///   type parameter of [`ReplayBufferBase`] and the proxy accepts
-///   [`ReplayBufferBase::Item`].
-/// * The proxy sends the transitions into the replay buffer, implementing
-///   [`ReplayBufferBase`], in the [`AsyncTrainer`].
-/// * The [`Agent`] in [`AsyncTrainer`] trains its model parameters by using batches
+/// * The [`Agent`] in [`AsyncTrainer`] (left) is trained with batches
 ///   of type [`ReplayBufferBase::Batch`], which are taken from the replay buffer.
 /// * The model parameters of the [`Agent`] in [`AsyncTrainer`] are wrapped in
 ///   [`SyncModel::ModelInfo`] and periodically sent to the [`Agent`]s in [`Actor`]s.
-///   [`Agent`] must implement [`SyncModel`] to synchronize its model.
+///   [`Agent`] must implement [`SyncModel`] to synchronize the model parameters.
+/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
+///   [`ReplayBufferBase::Item`], and push the transitions into
+///   [`ReplayBufferProxy`].
+/// * [`ReplayBufferProxy`] has a type parameter of [`ReplayBufferBase`] and the proxy accepts
+///   [`ReplayBufferBase::Item`].
+/// * The proxy sends the transitions into the replay buffer in the [`AsyncTrainer`].
 ///
 /// [`ActorManager`]: crate::ActorManager
 /// [`Actor`]: crate::Actor
 /// [`ReplayBufferBase::Item`]: border_core::ReplayBufferBase::PushedItem
+/// [`ReplayBufferBase::Batch`]: border_core::ReplayBufferBase::PushedBatch
 /// [`ReplayBufferProxy`]: crate::ReplayBufferProxy
 /// [`ReplayBufferBase`]: border_core::ReplayBufferBase
 /// [`SyncModel::ModelInfo`]: crate::SyncModel::ModelInfo
+/// [`Agent`]: border_core::Agent
 pub struct AsyncTrainer<A, E, R>
 where
     A: Agent<E, R> + Configurable<E> + SyncModel,
@@ -266,11 +267,8 @@ where
         };
         let mut agent = A::build(self.agent_config.clone());
         let mut buffer = R::build(&self.replay_buffer_config);
-        // let buffer = Arc::new(Mutex::new(R::build(&self.replay_buffer_config)));
         agent.train();
 
-        // self.run_replay_buffer_thread(buffer.clone());
-
         let mut max_eval_reward = f32::MIN;
         let mut opt_steps = 0;
         let mut samples = 0;
@@ -294,7 +292,6 @@ where
 
             // Add stats wrt computation cost
             if opt_steps % self.record_compute_cost_interval == 0 {
-                // record.insert("fps", Scalar(sampler.fps()));
                 record.insert("opt_steps_per_sec", Scalar(self.opt_steps_per_sec()));
             }
 

diff --git a/border-async-trainer/src/async_trainer/config.rs b/border-async-trainer/src/async_trainer/config.rs
@@ -6,7 +6,7 @@ use std::{
     path::Path,
 };
 
-/// Configuration of [AsyncTrainer](crate::AsyncTrainer)
+/// Configuration of [`AsyncTrainer`](crate::AsyncTrainer).
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct AsyncTrainerConfig {
     /// The maximum number of optimization steps.
@@ -56,3 +56,19 @@ impl AsyncTrainerConfig {
         Ok(self)
     }
 }
+
+impl Default for AsyncTrainerConfig {
+    /// There is no special intention behind these initial values.
+    fn default() -> Self {
+        Self {
+            max_opts: 10, //000,
+            model_dir: None,
+            eval_interval: 5000,
+            flush_record_interval: 5000,
+            record_compute_cost_interval: 5000,
+            save_interval: 50000,
+            sync_interval: 100,
+            warmup_period: 10000,
+        }
+    }
+}