Skip to content

Commit

Permalink
Merge pull request #99 from taku-y/edge
Browse files Browse the repository at this point in the history
Add crate border-policy-no-backend
  • Loading branch information
taku-y committed Aug 6, 2024
2 parents 443b783 + 1dc32a0 commit 6347b6c
Show file tree
Hide file tree
Showing 81 changed files with 2,085 additions and 879 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,9 @@ jobs:
cargo test --example sac_pendulum_tch --features=tch
cargo test --example dqn_cartpole --features=candle-core
cargo test --example sac_pendulum --features=candle-core
cd border-async-trainer; cargo test; cd ..
cd border-atari-env; cargo test; cd ..
cd border-candle-agent; cargo test; cd ..
cd border-tch-agent; cargo test; cd ..
cd border-policy-no-backend; cargo test --features=border-tch-agent; cd ..
cd border-py-gym-env; cargo test; cd ..
30 changes: 16 additions & 14 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,35 @@

### Added

* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2)
* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1)
* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12)
* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1).
* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18).
* Add crate `border-policy-no-backend`.

### Changed

* Take `self` in the signature of `push()` method of replay buffer (`border-core`)
* Fix a bug in `MlpConfig` (`border-tch-agent`)
* Bump the version of tch to 0.16.0 (`border-tch-agent`)
* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`)
* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10)
* Take `self` in the signature of `push()` method of replay buffer (`border-core`).
* Fix a bug in `MlpConfig` (`border-tch-agent`).
* Bump the version of tch to 0.16.0 (`border-tch-agent`).
* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`).
* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10).
* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12).

## v0.0.6 (2023-09-19)

### Added

* Docker files (`border`).
* Singularity files (`border`)
* Script for GPUSOROBAN (#67)
* Singularity files (`border`).
* Script for GPUSOROBAN (#67).
* `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`.
* Example of asynchronous trainer for native Atari environment and DQN (`border/examples`).
* Move tensorboard recorder into a separate crate (`border-tensorboard`)
* Move tensorboard recorder into a separate crate (`border-tensorboard`).

### Changed

* Bump the version of tch-rs to 0.8.0 (`border-tch-agent`).
* Rename agents as following the convention in Rust (`border-tch-agent`).
* Bump the version of gym to 0.26 (`border-py-gym-env`)
* Remove the type parameter for array shape of gym environments (`border-py-gym-env`)
* Interface of Python-Gym interface (`border-py-gym-env`)
* Bump the version of gym to 0.26 (`border-py-gym-env`).
* Remove the type parameter for array shape of gym environments (`border-py-gym-env`).
* Interface of Python-Gym interface (`border-py-gym-env`).
6 changes: 4 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ members = [
"border-derive",
"border-atari-env",
"border-async-trainer",
"border-policy-no-backend",
"border",
]
exclude = ["docker/"]
Expand Down Expand Up @@ -49,9 +50,10 @@ segment-tree = "2.0.0"
image = "0.23.14"
candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] }
candle-nn = "0.4.1"
rand = "0.8.5"
rand = { version = "0.8.5", features = ["small_rng"] }
itertools = "0.12.1"
ordered-float = "4.2.0"
reqwest = { version = "0.11.26", features = ["json", "blocking"] }
xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
candle-optimisers = "0.4.0"
candle-optimisers = "0.4.0"
bincode = "1.3.3"
14 changes: 9 additions & 5 deletions border-async-trainer/src/actor/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{
};

#[cfg_attr(doc, aquamarine::aquamarine)]
/// Runs interaction between an [`Agent`] and an [`Env`], then generates transitions.
/// Generate transitions by running [`Agent`] in [`Env`].
///
/// ```mermaid
/// flowchart TB
Expand All @@ -23,15 +23,18 @@ use std::{
/// C-->|ReplayBufferBase::PushedItem|F[ReplayBufferProxy]
/// ```
///
/// This diagram shows interaction of [`Agent`], [`Env`] and [`StepProcessor`],
/// as shown in [`border_core::Trainer`]. However, this diagram also shows that
/// In [`Actor`], an [`Agent`] runs on an [`Env`] and generates [`Step`] objects.
/// These objects are processed with [`StepProcessor`] and sent to [`ReplayBufferProxy`].
/// The [`Agent`] in the [`Actor`] periodically synchronizes with the [`Agent`] in
/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`], and the transitions generated by
/// [`StepProcessor`] are sent to the [`ReplayBufferProxy`].
/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`].
///
/// See also the diagram in [`AsyncTrainer`].
///
/// [`AsyncTrainer`]: crate::AsyncTrainer
/// [`Agent`]: border_core::Agent
/// [`Env`]: border_core::Env
/// [`StepProcessor`]: border_core::StepProcessor
/// [`Step`]: border_core::Step
pub struct Actor<A, E, P, R>
where
A: Agent<E, R> + Configurable<E> + SyncModel,
Expand Down Expand Up @@ -70,6 +73,7 @@ where
env_seed: i64,
stats: Arc<Mutex<Option<ActorStat>>>,
) -> Self {
log::info!("Create actor {}", id);
Self {
id,
stop,
Expand Down
4 changes: 2 additions & 2 deletions border-async-trainer/src/actor/stat.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use std::time::Duration;

/// Stats of sampling process in each [`Actor`](crate::Actor).
/// Stats of sampling process in an [`Actor`](crate::Actor).
#[derive(Clone, Debug)]
pub struct ActorStat {
/// The number of steps for interaction between agent and env.
pub env_steps: usize,

/// Duration of sampling loop in [`Actor`](crate::Actor).
/// Duration of sampling loop in the [`Actor`](crate::Actor).
pub duration: Duration,
}

Expand Down
2 changes: 1 addition & 1 deletion border-async-trainer/src/actor_manager.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! A manager of [Actor]()s.
//! A manager of [`Actor`](crate::Actor)s.
mod base;
mod config;
pub use base::ActorManager;
Expand Down
25 changes: 14 additions & 11 deletions border-async-trainer/src/actor_manager/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,22 @@ use std::{
/// Manages [`Actor`]s.
///
/// This struct handles the following requests:
/// * From the [LearnerManager]() for updating the latest model info, stored in this struct.
/// * From the [`AsyncTrainer`] for updating the latest model info, stored in this struct.
/// * From the [`Actor`]s for getting the latest model info.
/// * From the [`Actor`]s for pushing sample batch to the `LearnerManager`.
///
/// [`AsyncTrainer`]: crate::AsyncTrainer
pub struct ActorManager<A, E, R, P>
where
A: Agent<E, R> + Configurable<E> + SyncModel,
E: Env,
P: StepProcessor<E>,
R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
{
/// Configurations of [Agent]s.
/// Configurations of [`Agent`]s.
agent_configs: Vec<A::Config>,

/// Configuration of [Env].
/// Configuration of [`Env`].
env_config: E::Config,

/// Configuration of a `StepProcessor`.
Expand Down Expand Up @@ -77,7 +79,7 @@ where
R::Item: Send + 'static,
A::ModelInfo: Send + 'static,
{
/// Builds a [ActorManager].
/// Builds a [`ActorManager`].
pub fn build(
config: &ActorManagerConfig,
agent_configs: &Vec<A::Config>,
Expand All @@ -103,10 +105,10 @@ where
}
}

/// Runs threads for [Actor]s and a thread for sending samples into the replay buffer.
/// Runs threads for [`Actor`]s and a thread for sending samples into the replay buffer.
///
/// A thread will wait for the initial [SyncModel::ModelInfo] from [AsyncTrainer](crate::AsyncTrainer),
/// which blocks execution of [Actor] threads.
/// Each thread is blocked until receiving the initial [`SyncModel::ModelInfo`]
/// from [`AsyncTrainer`](crate::AsyncTrainer).
pub fn run(&mut self, guard_init_env: Arc<Mutex<bool>>) {
// Guard for sync of the initial model
let guard_init_model = Arc::new(Mutex::new(true));
Expand Down Expand Up @@ -220,10 +222,11 @@ where
// TODO: error handling, timeout
// TODO: caching
// TODO: stats
let msg = receiver.recv().unwrap();
_n_samples += 1;
sender.try_send(msg).unwrap();
// println!("{:?}", (_msg.id, n_samples));
let msg = receiver.recv();
if msg.is_ok() {
_n_samples += 1;
sender.try_send(msg.unwrap()).unwrap();
}

// Stop the loop
if *stop.lock().unwrap() {
Expand Down
23 changes: 10 additions & 13 deletions border-async-trainer/src/async_trainer/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,26 @@ use std::{
/// end
/// ```
///
/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
/// [`ReplayBufferBase::Item`], in parallel and push the transitions into
/// [`ReplayBufferProxy`]. It should be noted that [`ReplayBufferProxy`] has a
/// type parameter of [`ReplayBufferBase`] and the proxy accepts
/// [`ReplayBufferBase::Item`].
/// * The proxy sends the transitions into the replay buffer, implementing
/// [`ReplayBufferBase`], in the [`AsyncTrainer`].
/// * The [`Agent`] in [`AsyncTrainer`] trains its model parameters by using batches
/// * The [`Agent`] in [`AsyncTrainer`] (left) is trained with batches
/// of type [`ReplayBufferBase::Batch`], which are taken from the replay buffer.
/// * The model parameters of the [`Agent`] in [`AsyncTrainer`] are wrapped in
/// [`SyncModel::ModelInfo`] and periodically sent to the [`Agent`]s in [`Actor`]s.
/// [`Agent`] must implement [`SyncModel`] to synchronize its model.
/// [`Agent`] must implement [`SyncModel`] to synchronize the model parameters.
/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type
/// [`ReplayBufferBase::Item`], and push the transitions into
/// [`ReplayBufferProxy`].
/// * [`ReplayBufferProxy`] has a type parameter of [`ReplayBufferBase`] and the proxy accepts
/// [`ReplayBufferBase::Item`].
/// * The proxy sends the transitions into the replay buffer in the [`AsyncTrainer`].
///
/// [`ActorManager`]: crate::ActorManager
/// [`Actor`]: crate::Actor
/// [`ReplayBufferBase::Item`]: border_core::ReplayBufferBase::PushedItem
/// [`ReplayBufferBase::Batch`]: border_core::ReplayBufferBase::PushedBatch
/// [`ReplayBufferProxy`]: crate::ReplayBufferProxy
/// [`ReplayBufferBase`]: border_core::ReplayBufferBase
/// [`SyncModel::ModelInfo`]: crate::SyncModel::ModelInfo
/// [`Agent`]: border_core::Agent
pub struct AsyncTrainer<A, E, R>
where
A: Agent<E, R> + Configurable<E> + SyncModel,
Expand Down Expand Up @@ -266,11 +267,8 @@ where
};
let mut agent = A::build(self.agent_config.clone());
let mut buffer = R::build(&self.replay_buffer_config);
// let buffer = Arc::new(Mutex::new(R::build(&self.replay_buffer_config)));
agent.train();

// self.run_replay_buffer_thread(buffer.clone());

let mut max_eval_reward = f32::MIN;
let mut opt_steps = 0;
let mut samples = 0;
Expand All @@ -294,7 +292,6 @@ where

// Add stats wrt computation cost
if opt_steps % self.record_compute_cost_interval == 0 {
// record.insert("fps", Scalar(sampler.fps()));
record.insert("opt_steps_per_sec", Scalar(self.opt_steps_per_sec()));
}

Expand Down
18 changes: 17 additions & 1 deletion border-async-trainer/src/async_trainer/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{
path::Path,
};

/// Configuration of [AsyncTrainer](crate::AsyncTrainer)
/// Configuration of [`AsyncTrainer`](crate::AsyncTrainer).
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct AsyncTrainerConfig {
/// The maximum number of optimization steps.
Expand Down Expand Up @@ -56,3 +56,19 @@ impl AsyncTrainerConfig {
Ok(self)
}
}

impl Default for AsyncTrainerConfig {
/// There is no special intention behind these initial values.
fn default() -> Self {
Self {
max_opts: 10, //000,
model_dir: None,
eval_interval: 5000,
flush_record_interval: 5000,
record_compute_cost_interval: 5000,
save_interval: 50000,
sync_interval: 100,
warmup_period: 10000,
}
}
}
Loading

0 comments on commit 6347b6c

Please sign in to comment.