diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00fe9056..fa62b3c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,3 +101,9 @@ jobs: cargo test --example sac_pendulum_tch --features=tch cargo test --example dqn_cartpole --features=candle-core cargo test --example sac_pendulum --features=candle-core + cd border-async-trainer; cargo test; cd .. + cd border-atari-env; cargo test; cd .. + cd border-candle-agent; cargo test; cd .. + cd border-tch-agent; cargo test; cd .. + cd border-policy-no-backend; cargo test --features=border-tch-agent; cd .. + cd border-py-gym-env; cargo test; cd .. diff --git a/CHANGELOG.md b/CHANGELOG.md index abba777b..d912549b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,33 +4,35 @@ ### Added -* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2) -* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1) -* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12) +* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2). +* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1). +* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18). +* Add crate `border-policy-no-backend`. ### Changed -* Take `self` in the signature of `push()` method of replay buffer (`border-core`) -* Fix a bug in `MlpConfig` (`border-tch-agent`) -* Bump the version of tch to 0.16.0 (`border-tch-agent`) -* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`) -* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10) +* Take `self` in the signature of `push()` method of replay buffer (`border-core`). +* Fix a bug in `MlpConfig` (`border-tch-agent`). +* Bump the version of tch to 0.16.0 (`border-tch-agent`). +* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`). +* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10). +* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12). ## v0.0.6 (2023-09-19) ### Added * Docker files (`border`). -* Singularity files (`border`) -* Script for GPUSOROBAN (#67) +* Singularity files (`border`). +* Script for GPUSOROBAN (#67). * `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`. * Example of asynchronous trainer for native Atari environment and DQN (`border/examples`). -* Move tensorboard recorder into a separate crate (`border-tensorboard`) +* Move tensorboard recorder into a separate crate (`border-tensorboard`). ### Changed * Bump the version of tch-rs to 0.8.0 (`border-tch-agent`). * Rename agents as following the convention in Rust (`border-tch-agent`). -* Bump the version of gym to 0.26 (`border-py-gym-env`) -* Remove the type parameter for array shape of gym environments (`border-py-gym-env`) -* Interface of Python-Gym interface (`border-py-gym-env`) +* Bump the version of gym to 0.26 (`border-py-gym-env`). +* Remove the type parameter for array shape of gym environments (`border-py-gym-env`). +* Interface of Python-Gym interface (`border-py-gym-env`). diff --git a/Cargo.toml b/Cargo.toml index 02011f64..551830af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "border-derive", "border-atari-env", "border-async-trainer", + "border-policy-no-backend", "border", ] exclude = ["docker/"] @@ -49,9 +50,10 @@ segment-tree = "2.0.0" image = "0.23.14" candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] } candle-nn = "0.4.1" -rand = "0.8.5" +rand = { version = "0.8.5", features = ["small_rng"] } itertools = "0.12.1" ordered-float = "4.2.0" reqwest = { version = "0.11.26", features = ["json", "blocking"] } xxhash-rust = { version = "0.8.10", features = ["xxh3"] } -candle-optimisers = "0.4.0" \ No newline at end of file +candle-optimisers = "0.4.0" +bincode = "1.3.3" diff --git a/border-async-trainer/src/actor/base.rs b/border-async-trainer/src/actor/base.rs index 297a6f4f..95d0fdb3 100644 --- a/border-async-trainer/src/actor/base.rs +++ b/border-async-trainer/src/actor/base.rs @@ -10,7 +10,7 @@ use std::{ }; #[cfg_attr(doc, aquamarine::aquamarine)] -/// Runs interaction between an [`Agent`] and an [`Env`], then generates transitions. +/// Generate transitions by running [`Agent`] in [`Env`]. /// /// ```mermaid /// flowchart TB @@ -23,15 +23,18 @@ use std::{ /// C-->|ReplayBufferBase::PushedItem|F[ReplayBufferProxy] /// ``` /// -/// This diagram shows interaction of [`Agent`], [`Env`] and [`StepProcessor`], -/// as shown in [`border_core::Trainer`]. However, this diagram also shows that +/// In [`Actor`], an [`Agent`] runs on an [`Env`] and generates [`Step`] objects. +/// These objects are processed with [`StepProcessor`] and sent to [`ReplayBufferProxy`]. /// The [`Agent`] in the [`Actor`] periodically synchronizes with the [`Agent`] in -/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`], and the transitions generated by -/// [`StepProcessor`] are sent to the [`ReplayBufferProxy`]. +/// [`AsyncTrainer`] via [`SyncModel::ModelInfo`]. /// /// See also the diagram in [`AsyncTrainer`]. /// /// [`AsyncTrainer`]: crate::AsyncTrainer +/// [`Agent`]: border_core::Agent +/// [`Env`]: border_core::Env +/// [`StepProcessor`]: border_core::StepProcessor +/// [`Step`]: border_core::Step pub struct Actor where A: Agent + Configurable + SyncModel, @@ -70,6 +73,7 @@ where env_seed: i64, stats: Arc>>, ) -> Self { + log::info!("Create actor {}", id); Self { id, stop, diff --git a/border-async-trainer/src/actor/stat.rs b/border-async-trainer/src/actor/stat.rs index bc989ffc..3fb26199 100644 --- a/border-async-trainer/src/actor/stat.rs +++ b/border-async-trainer/src/actor/stat.rs @@ -1,12 +1,12 @@ use std::time::Duration; -/// Stats of sampling process in each [`Actor`](crate::Actor). +/// Stats of sampling process in an [`Actor`](crate::Actor). #[derive(Clone, Debug)] pub struct ActorStat { /// The number of steps for interaction between agent and env. pub env_steps: usize, - /// Duration of sampling loop in [`Actor`](crate::Actor). + /// Duration of sampling loop in the [`Actor`](crate::Actor). pub duration: Duration, } diff --git a/border-async-trainer/src/actor_manager.rs b/border-async-trainer/src/actor_manager.rs index 60304b11..546371ff 100644 --- a/border-async-trainer/src/actor_manager.rs +++ b/border-async-trainer/src/actor_manager.rs @@ -1,4 +1,4 @@ -//! A manager of [Actor]()s. +//! A manager of [`Actor`](crate::Actor)s. mod base; mod config; pub use base::ActorManager; diff --git a/border-async-trainer/src/actor_manager/base.rs b/border-async-trainer/src/actor_manager/base.rs index cbfee205..2170921b 100644 --- a/border-async-trainer/src/actor_manager/base.rs +++ b/border-async-trainer/src/actor_manager/base.rs @@ -15,9 +15,11 @@ use std::{ /// Manages [`Actor`]s. /// /// This struct handles the following requests: -/// * From the [LearnerManager]() for updating the latest model info, stored in this struct. +/// * From the [`AsyncTrainer`] for updating the latest model info, stored in this struct. /// * From the [`Actor`]s for getting the latest model info. /// * From the [`Actor`]s for pushing sample batch to the `LearnerManager`. +/// +/// [`AsyncTrainer`]: crate::AsyncTrainer pub struct ActorManager where A: Agent + Configurable + SyncModel, @@ -25,10 +27,10 @@ where P: StepProcessor, R: ExperienceBufferBase + ReplayBufferBase, { - /// Configurations of [Agent]s. + /// Configurations of [`Agent`]s. agent_configs: Vec, - /// Configuration of [Env]. + /// Configuration of [`Env`]. env_config: E::Config, /// Configuration of a `StepProcessor`. @@ -77,7 +79,7 @@ where R::Item: Send + 'static, A::ModelInfo: Send + 'static, { - /// Builds a [ActorManager]. + /// Builds a [`ActorManager`]. pub fn build( config: &ActorManagerConfig, agent_configs: &Vec, @@ -103,10 +105,10 @@ where } } - /// Runs threads for [Actor]s and a thread for sending samples into the replay buffer. + /// Runs threads for [`Actor`]s and a thread for sending samples into the replay buffer. /// - /// A thread will wait for the initial [SyncModel::ModelInfo] from [AsyncTrainer](crate::AsyncTrainer), - /// which blocks execution of [Actor] threads. + /// Each thread is blocked until receiving the initial [`SyncModel::ModelInfo`] + /// from [`AsyncTrainer`](crate::AsyncTrainer). pub fn run(&mut self, guard_init_env: Arc>) { // Guard for sync of the initial model let guard_init_model = Arc::new(Mutex::new(true)); @@ -220,10 +222,11 @@ where // TODO: error handling, timeout // TODO: caching // TODO: stats - let msg = receiver.recv().unwrap(); - _n_samples += 1; - sender.try_send(msg).unwrap(); - // println!("{:?}", (_msg.id, n_samples)); + let msg = receiver.recv(); + if msg.is_ok() { + _n_samples += 1; + sender.try_send(msg.unwrap()).unwrap(); + } // Stop the loop if *stop.lock().unwrap() { diff --git a/border-async-trainer/src/async_trainer/base.rs b/border-async-trainer/src/async_trainer/base.rs index 2a16f5de..ffcc001d 100644 --- a/border-async-trainer/src/async_trainer/base.rs +++ b/border-async-trainer/src/async_trainer/base.rs @@ -33,25 +33,26 @@ use std::{ /// end /// ``` /// -/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type -/// [`ReplayBufferBase::Item`], in parallel and push the transitions into -/// [`ReplayBufferProxy`]. It should be noted that [`ReplayBufferProxy`] has a -/// type parameter of [`ReplayBufferBase`] and the proxy accepts -/// [`ReplayBufferBase::Item`]. -/// * The proxy sends the transitions into the replay buffer, implementing -/// [`ReplayBufferBase`], in the [`AsyncTrainer`]. -/// * The [`Agent`] in [`AsyncTrainer`] trains its model parameters by using batches +/// * The [`Agent`] in [`AsyncTrainer`] (left) is trained with batches /// of type [`ReplayBufferBase::Batch`], which are taken from the replay buffer. /// * The model parameters of the [`Agent`] in [`AsyncTrainer`] are wrapped in /// [`SyncModel::ModelInfo`] and periodically sent to the [`Agent`]s in [`Actor`]s. -/// [`Agent`] must implement [`SyncModel`] to synchronize its model. +/// [`Agent`] must implement [`SyncModel`] to synchronize the model parameters. +/// * In [`ActorManager`] (right), [`Actor`]s sample transitions, which have type +/// [`ReplayBufferBase::Item`], and push the transitions into +/// [`ReplayBufferProxy`]. +/// * [`ReplayBufferProxy`] has a type parameter of [`ReplayBufferBase`] and the proxy accepts +/// [`ReplayBufferBase::Item`]. +/// * The proxy sends the transitions into the replay buffer in the [`AsyncTrainer`]. /// /// [`ActorManager`]: crate::ActorManager /// [`Actor`]: crate::Actor /// [`ReplayBufferBase::Item`]: border_core::ReplayBufferBase::PushedItem +/// [`ReplayBufferBase::Batch`]: border_core::ReplayBufferBase::PushedBatch /// [`ReplayBufferProxy`]: crate::ReplayBufferProxy /// [`ReplayBufferBase`]: border_core::ReplayBufferBase /// [`SyncModel::ModelInfo`]: crate::SyncModel::ModelInfo +/// [`Agent`]: border_core::Agent pub struct AsyncTrainer where A: Agent + Configurable + SyncModel, @@ -266,11 +267,8 @@ where }; let mut agent = A::build(self.agent_config.clone()); let mut buffer = R::build(&self.replay_buffer_config); - // let buffer = Arc::new(Mutex::new(R::build(&self.replay_buffer_config))); agent.train(); - // self.run_replay_buffer_thread(buffer.clone()); - let mut max_eval_reward = f32::MIN; let mut opt_steps = 0; let mut samples = 0; @@ -294,7 +292,6 @@ where // Add stats wrt computation cost if opt_steps % self.record_compute_cost_interval == 0 { - // record.insert("fps", Scalar(sampler.fps())); record.insert("opt_steps_per_sec", Scalar(self.opt_steps_per_sec())); } diff --git a/border-async-trainer/src/async_trainer/config.rs b/border-async-trainer/src/async_trainer/config.rs index 48498e36..95f5b115 100644 --- a/border-async-trainer/src/async_trainer/config.rs +++ b/border-async-trainer/src/async_trainer/config.rs @@ -6,7 +6,7 @@ use std::{ path::Path, }; -/// Configuration of [AsyncTrainer](crate::AsyncTrainer) +/// Configuration of [`AsyncTrainer`](crate::AsyncTrainer). #[derive(Clone, Debug, Deserialize, Serialize)] pub struct AsyncTrainerConfig { /// The maximum number of optimization steps. @@ -56,3 +56,19 @@ impl AsyncTrainerConfig { Ok(self) } } + +impl Default for AsyncTrainerConfig { + /// There is no special intention behind these initial values. + fn default() -> Self { + Self { + max_opts: 10, //000, + model_dir: None, + eval_interval: 5000, + flush_record_interval: 5000, + record_compute_cost_interval: 5000, + save_interval: 50000, + sync_interval: 100, + warmup_period: 10000, + } + } +} diff --git a/border-async-trainer/src/lib.rs b/border-async-trainer/src/lib.rs index d6747291..ce775418 100644 --- a/border-async-trainer/src/lib.rs +++ b/border-async-trainer/src/lib.rs @@ -2,61 +2,127 @@ //! //! The code might look like below. //! -//! ```ignore -//! fn train() { -//! let agent_configs: Vec<_> = vec![agent_config()]; -//! let env_config_train = env_config(name); -//! let env_config_eval = env_config(name).eval(); -//! let replay_buffer_config = load_replay_buffer_config(model_dir.as_str())?; -//! let step_proc_config = SimpleStepProcessorConfig::default(); -//! let actor_man_config = ActorManagerConfig::default(); -//! let async_trainer_config = load_async_trainer_config(model_dir.as_str())?; -//! let mut recorder = TensorboardRecorder::new(model_dir); -//! let mut evaluator = Evaluator::new(&env_config_eval, 0, 1)?; -//! -//! // Shared flag to stop actor threads -//! let stop = Arc::new(Mutex::new(false)); +//! ``` +//! # use serde::{Deserialize, Serialize}; +//! # use border_core::test::{ +//! # TestAgent, TestAgentConfig, TestEnv, TestObs, TestObsBatch, +//! # TestAct, TestActBatch +//! # }; +//! # use border_async_trainer::{ +//! # //test::{TestAgent, TestAgentConfig, TestEnv}, +//! # ActorManager, ActorManagerConfig, AsyncTrainer, AsyncTrainerConfig, +//! # }; +//! # use border_core::{ +//! # generic_replay_buffer::{ +//! # SimpleReplayBuffer, SimpleReplayBufferConfig, +//! # SimpleStepProcessorConfig, SimpleStepProcessor +//! # }, +//! # record::{AggregateRecorder, NullRecorder}, DefaultEvaluator, +//! # }; +//! # +//! # fn agent_config() -> TestAgentConfig { +//! # TestAgentConfig +//! # } +//! # +//! # fn env_config() -> usize { +//! # 0 +//! # } +//! +//! type Env = TestEnv; +//! type ObsBatch = TestObsBatch; +//! type ActBatch = TestActBatch; +//! type ReplayBuffer = SimpleReplayBuffer; +//! type StepProcessor = SimpleStepProcessor; +//! +//! // Create a new agent by wrapping the existing agent in order to implement SyncModel. +//! struct TestAgent2(TestAgent); +//! +//! impl border_core::Configurable for TestAgent2 { +//! type Config = TestAgentConfig; +//! +//! fn build(config: Self::Config) -> Self { +//! Self(TestAgent::build(config)) +//! } +//! } +//! +//! impl border_core::Agent for TestAgent2 { +//! // Boilerplate code to delegate the method calls to the inner agent. +//! fn train(&mut self) { +//! self.0.train(); +//! } //! -//! // Creates channels -//! let (item_s, item_r) = unbounded(); // items pushed to replay buffer -//! let (model_s, model_r) = unbounded(); // model_info +//! // For other methods ... +//! # fn is_train(&self) -> bool { +//! # self.0.is_train() +//! # } +//! # +//! # fn eval(&mut self) { +//! # self.0.eval(); +//! # } +//! # +//! # fn opt_with_record(&mut self, buffer: &mut ReplayBuffer) -> border_core::record::Record { +//! # self.0.opt_with_record(buffer) +//! # } +//! # +//! # fn save_params>(&self, path: T) -> anyhow::Result<()> { +//! # self.0.save_params(path) +//! # } +//! # +//! # fn load_params>(&mut self, path: T) -> anyhow::Result<()> { +//! # self.0.load_params(path) +//! # } +//! # +//! # fn opt(&mut self, buffer: &mut ReplayBuffer) { +//! # self.0.opt_with_record(buffer); +//! # } +//! } //! -//! // guard for initialization of envs in multiple threads -//! let guard_init_env = Arc::new(Mutex::new(true)); +//! impl border_core::Policy for TestAgent2 { +//! // Boilerplate code to delegate the method calls to the inner agent. +//! // ... +//! # fn sample(&mut self, obs: &TestObs) -> TestAct { +//! # self.0.sample(obs) +//! # } +//! } +//! +//! impl border_async_trainer::SyncModel for TestAgent2{ +//! // Self::ModelInfo shold include the model parameters. +//! type ModelInfo = usize; +//! //! -//! // Actor manager and async trainer -//! let mut actors = ActorManager::build( -//! &actor_man_config, -//! &agent_configs, -//! &env_config_train, -//! &step_proc_config, -//! item_s, -//! model_r, -//! stop.clone(), -//! ); -//! let mut trainer = AsyncTrainer::build( -//! &async_trainer_config, -//! &agent_config, -//! &env_config_eval, -//! &replay_buffer_config, -//! item_r, -//! model_s, -//! stop.clone(), -//! ); +//! fn model_info(&self) -> (usize, Self::ModelInfo) { +//! // Extracts the model parameters and returns them as Self::ModelInfo. +//! // The first element of the tuple is the number of optimization steps. +//! (0, 0) +//! } //! -//! // Set the number of threads -//! tch::set_num_threads(1); +//! fn sync_model(&mut self, _model_info: &Self::ModelInfo) { +//! // implements synchronization of the model based on the _model_info +//! } +//! } //! -//! // Starts sampling and training -//! actors.run(guard_init_env.clone()); -//! let stats = trainer.train(&mut recorder, &mut evaluator, guard_init_env); -//! println!("Stats of async trainer"); -//! println!("{}", stats.fmt()); +//! let agent_configs: Vec<_> = vec![agent_config()]; +//! let env_config_train = env_config(); +//! let env_config_eval = env_config(); +//! let replay_buffer_config = SimpleReplayBufferConfig::default(); +//! let step_proc_config = SimpleStepProcessorConfig::default(); +//! let actor_man_config = ActorManagerConfig::default(); +//! let async_trainer_config = AsyncTrainerConfig::default(); +//! let mut recorder: Box = Box::new(NullRecorder {}); +//! let mut evaluator = DefaultEvaluator::::new(&env_config_eval, 0, 1).unwrap(); //! -//! let stats = actors.stop_and_join(); -//! println!("Stats of generated samples in actors"); -//! println!("{}", actor_stats_fmt(&stats)); -//! } +//! border_async_trainer::util::train_async::<_, _, _, StepProcessor>( +//! &agent_config(), +//! &agent_configs, +//! &env_config_train, +//! &env_config_eval, +//! &step_proc_config, +//! &replay_buffer_config, +//! &actor_man_config, +//! &async_trainer_config, +//! &mut recorder, +//! &mut evaluator, +//! ); //! ``` //! //! Training process consists of the following two components: @@ -89,6 +155,7 @@ mod messages; mod replay_buffer_proxy; mod sync_model; pub mod util; + pub use actor::{actor_stats_fmt, Actor, ActorStat}; pub use actor_manager::{ActorManager, ActorManagerConfig}; pub use async_trainer::{AsyncTrainStat, AsyncTrainer, AsyncTrainerConfig}; @@ -96,3 +163,226 @@ pub use error::BorderAsyncTrainerError; pub use messages::PushedItemMessage; pub use replay_buffer_proxy::{ReplayBufferProxy, ReplayBufferProxyConfig}; pub use sync_model::SyncModel; + +/// Agent and Env for testing. +#[cfg(test)] +pub mod test { + use serde::{Deserialize, Serialize}; + + /// Obs for testing. + #[derive(Clone, Debug)] + pub struct TestObs { + obs: usize, + } + + impl border_core::Obs for TestObs { + fn dummy(_n: usize) -> Self { + Self { obs: 0 } + } + + fn len(&self) -> usize { + 1 + } + } + + /// Batch of obs for testing. + pub struct TestObsBatch { + obs: Vec, + } + + impl border_core::generic_replay_buffer::BatchBase for TestObsBatch { + fn new(capacity: usize) -> Self { + Self { + obs: vec![0; capacity], + } + } + + fn push(&mut self, i: usize, data: Self) { + self.obs[i] = data.obs[0]; + } + + fn sample(&self, ixs: &Vec) -> Self { + let obs = ixs.iter().map(|ix| self.obs[*ix]).collect(); + Self { obs } + } + } + + impl From for TestObsBatch { + fn from(obs: TestObs) -> Self { + Self { obs: vec![obs.obs] } + } + } + + /// Act for testing. + #[derive(Clone, Debug)] + pub struct TestAct { + act: usize, + } + + impl border_core::Act for TestAct {} + + /// Batch of act for testing. + pub struct TestActBatch { + act: Vec, + } + + impl From for TestActBatch { + fn from(act: TestAct) -> Self { + Self { act: vec![act.act] } + } + } + + impl border_core::generic_replay_buffer::BatchBase for TestActBatch { + fn new(capacity: usize) -> Self { + Self { + act: vec![0; capacity], + } + } + + fn push(&mut self, i: usize, data: Self) { + self.act[i] = data.act[0]; + } + + fn sample(&self, ixs: &Vec) -> Self { + let act = ixs.iter().map(|ix| self.act[*ix]).collect(); + Self { act } + } + } + + /// Info for testing. + pub struct TestInfo {} + + impl border_core::Info for TestInfo {} + + /// Environment for testing. + pub struct TestEnv { + state_init: usize, + state: usize, + } + + impl border_core::Env for TestEnv { + type Config = usize; + type Obs = TestObs; + type Act = TestAct; + type Info = TestInfo; + + fn reset(&mut self, _is_done: Option<&Vec>) -> anyhow::Result { + self.state = self.state_init; + Ok(TestObs { obs: self.state }) + } + + fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result { + self.state = self.state_init; + Ok(TestObs { obs: self.state }) + } + + fn step_with_reset( + &mut self, + a: &Self::Act, + ) -> (border_core::Step, border_core::record::Record) + where + Self: Sized, + { + self.state = self.state + a.act; + let step = border_core::Step { + obs: TestObs { obs: self.state }, + act: a.clone(), + reward: vec![0.0], + is_terminated: vec![0], + is_truncated: vec![0], + info: TestInfo {}, + init_obs: TestObs { + obs: self.state_init, + }, + }; + return (step, border_core::record::Record::empty()); + } + + fn step(&mut self, a: &Self::Act) -> (border_core::Step, border_core::record::Record) + where + Self: Sized, + { + self.state = self.state + a.act; + let step = border_core::Step { + obs: TestObs { obs: self.state }, + act: a.clone(), + reward: vec![0.0], + is_terminated: vec![0], + is_truncated: vec![0], + info: TestInfo {}, + init_obs: TestObs { + obs: self.state_init, + }, + }; + return (step, border_core::record::Record::empty()); + } + + fn build(config: &Self::Config, _seed: i64) -> anyhow::Result + where + Self: Sized, + { + Ok(Self { + state_init: *config, + state: 0, + }) + } + } + + type ReplayBuffer = + border_core::generic_replay_buffer::SimpleReplayBuffer; + + /// Agent for testing. + pub struct TestAgent {} + + #[derive(Clone, Deserialize, Serialize)] + /// Config of agent for testing. + pub struct TestAgentConfig; + + impl border_core::Agent for TestAgent { + fn train(&mut self) {} + + fn is_train(&self) -> bool { + false + } + + fn eval(&mut self) {} + + fn opt_with_record(&mut self, _buffer: &mut ReplayBuffer) -> border_core::record::Record { + border_core::record::Record::empty() + } + + fn save_params>(&self, _path: T) -> anyhow::Result<()> { + Ok(()) + } + + fn load_params>(&mut self, _path: T) -> anyhow::Result<()> { + Ok(()) + } + } + + impl border_core::Policy for TestAgent { + fn sample(&mut self, _obs: &TestObs) -> TestAct { + TestAct { act: 1 } + } + } + + impl border_core::Configurable for TestAgent { + type Config = TestAgentConfig; + + fn build(_config: Self::Config) -> Self { + Self {} + } + } + + impl crate::SyncModel for TestAgent { + type ModelInfo = usize; + + fn model_info(&self) -> (usize, Self::ModelInfo) { + (0, 0) + } + + fn sync_model(&mut self, _model_info: &Self::ModelInfo) { + // nothing to do + } + } +} diff --git a/border-async-trainer/src/replay_buffer_proxy.rs b/border-async-trainer/src/replay_buffer_proxy.rs index ccd263e0..263c5beb 100644 --- a/border-async-trainer/src/replay_buffer_proxy.rs +++ b/border-async-trainer/src/replay_buffer_proxy.rs @@ -9,7 +9,7 @@ use std::marker::PhantomData; pub struct ReplayBufferProxyConfig { /// Number of samples buffered until sent to the trainer. /// - /// Here, a sample corresponds to a `R::Item` for [`ReplayBufferProxy`]``. + /// A sample is a `R::Item` for [`ReplayBufferProxy`]``. pub n_buffer: usize, } diff --git a/border-atari-env/src/act.rs b/border-atari-env/src/act.rs index 9ee50316..6feae6bf 100644 --- a/border-atari-env/src/act.rs +++ b/border-atari-env/src/act.rs @@ -5,7 +5,9 @@ use serde::{Deserialize, Serialize}; use std::{default::Default, marker::PhantomData}; #[derive(Debug, Clone)] -/// Action for [BorderAtariEnv](crate::BorderAtariEnv) +/// Action for [`BorderAtariEnv`](crate::BorderAtariEnv). +/// +/// This action is a discrete action and denotes pushing a button. pub struct BorderAtariAct { pub act: u8, } @@ -28,7 +30,7 @@ impl From for BorderAtariAct { } } -/// Converts `A` to [`BorderAtariAct`]. +/// Converts action of type `A` to [`BorderAtariAct`]. pub trait BorderAtariActFilter { /// Configuration of the filter. type Config: Clone + Default; @@ -56,7 +58,7 @@ impl Default for BorderAtariActRawFilterConfig { } } -/// A filter without any processing. +/// A filter that performs no processing. pub struct BorderAtariActRawFilter { phantom: PhantomData, } diff --git a/border-atari-env/src/atari_env.rs b/border-atari-env/src/atari_env.rs index ec4d0758..b2731cd2 100644 --- a/border-atari-env/src/atari_env.rs +++ b/border-atari-env/src/atari_env.rs @@ -1,3 +1,4 @@ +//! Atari environment for reinforcement learning. pub mod ale; use std::path::Path; diff --git a/border-atari-env/src/env/config.rs b/border-atari-env/src/env/config.rs index 1f410644..9abb742b 100644 --- a/border-atari-env/src/env/config.rs +++ b/border-atari-env/src/env/config.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use std::{default::Default, env}; #[derive(Serialize, Deserialize, Debug)] -/// Configurations of [`BorderAtariEnv`](super::BorderAtariEnv). +/// Configuration of [`BorderAtariEnv`](super::BorderAtariEnv). pub struct BorderAtariEnvConfig where O: Obs, diff --git a/border-atari-env/src/lib.rs b/border-atari-env/src/lib.rs index 0f2d8277..7d47560e 100644 --- a/border-atari-env/src/lib.rs +++ b/border-atari-env/src/lib.rs @@ -1,13 +1,13 @@ -//! A thin wrapper of [atari-env](https://crates.io/crates/atari-env) for [Border](https://crates.io/crates/border). +//! A thin wrapper of [`atari-env`](https://crates.io/crates/atari-env) for [`Border`](https://crates.io/crates/border). //! //! The code under [atari_env] is adapted from the -//! [atari-env](https://crates.io/crates/atari-env) crate +//! [`atari-env`](https://crates.io/crates/atari-env) crate //! (rev = `0ef0422f953d79e96b32ad14284c9600bd34f335`), //! because the crate registered in crates.io does not implement //! [`atari_env::AtariEnv::lives()`] method, which is required for episodic life environments. //! //! This environment applies some preprocessing to observation as in -//! [atari_wrapper.py](https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py). +//! [`atari_wrapper.py`](https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py). //! //! You need to place Atari Rom directories under the directory specified by environment variable //! `ATARI_ROM_DIR`. An easy way to do this is to use [AutoROM](https://pypi.org/project/AutoROM/) @@ -28,55 +28,50 @@ //! BorderAtariAct, BorderAtariActRawFilter, BorderAtariEnv, BorderAtariEnvConfig, //! BorderAtariObs, BorderAtariObsRawFilter, //! }; -//! use border_core::{util, Env as _, Policy, DefaultEvaluator, Evaluator as _}; -//! -//! type Obs = BorderAtariObs; -//! type Act = BorderAtariAct; -//! type ObsFilter = BorderAtariObsRawFilter; -//! type ActFilter = BorderAtariActRawFilter; -//! type EnvConfig = BorderAtariEnvConfig; -//! type Env = BorderAtariEnv; -//! -//! #[derive(Clone)] -//! struct RandomPolicyConfig { -//! pub n_acts: usize, -//! } -//! -//! struct RandomPolicy { -//! n_acts: usize, -//! } -//! -//! impl Policy for RandomPolicy { -//! type Config = RandomPolicyConfig; -//! -//! fn build(config: Self::Config) -> Self { -//! Self { -//! n_acts: config.n_acts, -//! } -//! } -//! -//! fn sample(&mut self, _: &Obs) -> Act { -//! fastrand::u8(..self.n_acts as u8).into() -//! } -//! } -//! -//! fn env_config(name: String) -> EnvConfig { -//! EnvConfig::default().name(name) -//! } -//! +//! use border_core::{Env as _, Policy, DefaultEvaluator, Evaluator as _}; +//! +//! # type Obs = BorderAtariObs; +//! # type Act = BorderAtariAct; +//! # type ObsFilter = BorderAtariObsRawFilter; +//! # type ActFilter = BorderAtariActRawFilter; +//! # type EnvConfig = BorderAtariEnvConfig; +//! # type Env = BorderAtariEnv; +//! # +//! # #[derive(Clone)] +//! # struct RandomPolicyConfig { +//! # pub n_acts: usize, +//! # } +//! # +//! # struct RandomPolicy { +//! # n_acts: usize, +//! # } +//! # +//! # impl RandomPolicy { +//! # pub fn build(n_acts: usize) -> Self { +//! # Self { n_acts } +//! # } +//! # } +//! # +//! # impl Policy for RandomPolicy { +//! # fn sample(&mut self, _: &Obs) -> Act { +//! # fastrand::u8(..self.n_acts as u8).into() +//! # } +//! # } +//! # +//! # fn env_config(name: String) -> EnvConfig { +//! # EnvConfig::default().name(name) +//! # } +//! # //! fn main() -> Result<()> { -//! env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); -//! fastrand::seed(42); -//! +//! # env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); +//! # fastrand::seed(42); +//! # //! // Creates Pong environment //! let env_config = env_config("pong".to_string()); //! //! // Creates a random policy -//! let n_acts = 4; // number of actions; -//! let policy_config = RandomPolicyConfig { -//! n_acts: n_acts as _, -//! }; -//! let mut policy = RandomPolicy::build(policy_config); +//! let n_acts = 4; +//! let mut policy = RandomPolicy::build(n_acts); //! //! // Runs evaluation //! let env_config = env_config.render(true); diff --git a/border-atari-env/src/obs.rs b/border-atari-env/src/obs.rs index f37e4ab0..2a5e4b59 100644 --- a/border-atari-env/src/obs.rs +++ b/border-atari-env/src/obs.rs @@ -67,7 +67,7 @@ impl From for Tensor { } } -/// Converts [`BorderAtariObs`] to `O` with an arbitrary processing. +/// Converts [`BorderAtariObs`] to observation of type `O` with an arbitrary processing. pub trait BorderAtariObsFilter { /// Configuration of the filter. type Config: Clone + Default; @@ -98,7 +98,7 @@ impl Default for BorderAtariObsRawFilterConfig { } } -/// A filter without any processing. +/// A filter that performs no processing. pub struct BorderAtariObsRawFilter { phantom: PhantomData, } diff --git a/border-atari-env/src/util.rs b/border-atari-env/src/util.rs index 7b788c20..8788d89b 100644 --- a/border-atari-env/src/util.rs +++ b/border-atari-env/src/util.rs @@ -1 +1,2 @@ +//! Utility functions for testing. pub mod test; diff --git a/border-candle-agent/src/dqn/base.rs b/border-candle-agent/src/dqn/base.rs index a7f2d7fb..f67055b0 100644 --- a/border-candle-agent/src/dqn/base.rs +++ b/border-candle-agent/src/dqn/base.rs @@ -1,4 +1,4 @@ -//! DQN agent implemented with tch-rs. +//! DQN agent implemented with candle. use super::{config::DqnConfig, explorer::DqnExplorer, model::DqnModel}; use crate::{ model::SubModel1, @@ -17,7 +17,7 @@ use std::convert::TryFrom; use std::{fs, marker::PhantomData, path::Path}; #[allow(clippy::upper_case_acronyms, dead_code)] -/// DQN agent implemented with tch-rs. +/// DQN agent implemented with candle. pub struct Dqn where Q: SubModel1, @@ -330,6 +330,10 @@ where record } + /// Save model parameters in the given directory. + /// + /// The parameters of the model are saved as `qnet.pt`. + /// The parameters of the target model are saved as `qnet_tgt.pt`. fn save_params>(&self, path: T) -> Result<()> { // TODO: consider to rename the path if it already exists fs::create_dir_all(&path)?; diff --git a/border-candle-agent/src/dqn/model.rs b/border-candle-agent/src/dqn/model.rs index c14f888d..90efee8b 100644 --- a/border-candle-agent/src/dqn/model.rs +++ b/border-candle-agent/src/dqn/model.rs @@ -79,6 +79,12 @@ where } } +/// Action value function model for DQN. +/// +/// The architecture of the model is defined by the type parameter `Q`, +/// which should implement [`SubModel1`]. +/// This takes [`SubModel1::Input`] as input and outputs a tensor. +/// The output tensor should have the same dimension as the number of actions. pub struct DqnModel where Q: SubModel1, diff --git a/border-candle-agent/src/lib.rs b/border-candle-agent/src/lib.rs index 2ae29440..9809a488 100644 --- a/border-candle-agent/src/lib.rs +++ b/border-candle-agent/src/lib.rs @@ -16,6 +16,8 @@ pub use tensor_batch::{TensorBatch, ZeroTensor}; /// Device for using candle. /// /// This enum is added because [`candle_core::Device`] does not support serialization. +/// +/// [`candle_core::Device`]: https://docs.rs/candle-core/0.4.1/candle_core/enum.Device.html pub enum Device { /// The main CPU device. Cpu, diff --git a/border-candle-agent/src/model.rs b/border-candle-agent/src/model.rs index 43f245c4..6fbacc6a 100644 --- a/border-candle-agent/src/model.rs +++ b/border-candle-agent/src/model.rs @@ -7,7 +7,7 @@ use candle_nn::VarBuilder; /// Neural network model not owing its [`VarMap`] internally. /// -/// [`VarMap`]: candle_nn::VarMap +/// [`VarMap`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_map/struct.VarMap.html pub trait SubModel1 { /// Configuration from which [`SubModel1`] is constructed. type Config; @@ -19,6 +19,8 @@ pub trait SubModel1 { type Output; /// Builds [`SubModel1`] with [`VarBuilder`] and [`SubModel1::Config`]. + /// + /// [`VarBuilder`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_builder/type.VarBuilder.html fn build(vb: VarBuilder, config: Self::Config) -> Self; /// A generalized forward function. @@ -29,7 +31,7 @@ pub trait SubModel1 { /// /// The difference from [`SubModel1`] is that this trait takes two inputs. /// -/// [`VarMap`]: candle_nn::VarMap +/// [`VarMap`]: https://docs.rs/candle-nn/0.4.1/candle_nn/var_map/struct.VarMap.html pub trait SubModel2 { /// Configuration from which [`SubModel2`] is constructed. type Config; diff --git a/border-candle-agent/src/opt.rs b/border-candle-agent/src/opt.rs index 48fe7cd4..0dff9522 100644 --- a/border-candle-agent/src/opt.rs +++ b/border-candle-agent/src/opt.rs @@ -113,6 +113,8 @@ impl Default for OptimizerConfig { /// Optimizers. /// /// This is a thin wrapper of [`candle_nn::optim::Optimizer`]. +/// +/// [`candle_nn::optim::Optimizer`]: https://docs.rs/candle-nn/0.4.1/candle_nn/optim/trait.Optimizer.html pub enum Optimizer { /// Adam optimizer. AdamW(AdamW), diff --git a/border-candle-agent/src/sac.rs b/border-candle-agent/src/sac.rs index bd2b31ea..89164dfd 100644 --- a/border-candle-agent/src/sac.rs +++ b/border-candle-agent/src/sac.rs @@ -1,10 +1,156 @@ //! SAC agent. //! -//! Here is an example in `border/examples/sac_pendulum.rs` +//! Here is an example of creating SAC agent: //! -//! ```rust,ignore +//! ```no_run +//! # use anyhow::Result; +//! use border_core::{ +//! # Env as Env_, Obs as Obs_, Act as Act_, Step, test::{ +//! # TestAct as TestAct_, TestActBatch as TestActBatch_, +//! # TestEnv as TestEnv_, +//! # TestObs as TestObs_, TestObsBatch as TestObsBatch_, +//! # }, +//! # record::Record, +//! # generic_replay_buffer::{SimpleReplayBuffer, BatchBase}, +//! Configurable, +//! }; +//! use border_candle_agent::{ +//! sac::{ActorConfig, CriticConfig, Sac, SacConfig}, +//! mlp::{Mlp, Mlp2, MlpConfig}, +//! opt::OptimizerConfig +//! }; +//! +//! # struct TestEnv(TestEnv_); +//! # #[derive(Clone, Debug)] +//! # struct TestObs(TestObs_); +//! # #[derive(Clone, Debug)] +//! # struct TestAct(TestAct_); +//! # struct TestObsBatch(TestObsBatch_); +//! # struct TestActBatch(TestActBatch_); +//! # +//! # impl Obs_ for TestObs { +//! # fn dummy(n: usize) -> Self { +//! # Self(TestObs_::dummy(n)) +//! # } +//! # +//! # fn len(&self) -> usize { +//! # self.0.len() +//! # } +//! # } +//! # +//! # impl Into for TestObs { +//! # fn into(self) -> candle_core::Tensor { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl BatchBase for TestObsBatch { +//! # fn new(n: usize) -> Self { +//! # Self(TestObsBatch_::new(n)) +//! # } +//! # +//! # fn push(&mut self, ix: usize, data: Self) { +//! # self.0.push(ix, data.0); +//! # } +//! # +//! # fn sample(&self, ixs: &Vec) -> Self { +//! # Self(self.0.sample(ixs)) +//! # } +//! # } +//! # +//! # impl BatchBase for TestActBatch { +//! # fn new(n: usize) -> Self { +//! # Self(TestActBatch_::new(n)) +//! # } +//! # +//! # fn push(&mut self, ix: usize, data: Self) { +//! # self.0.push(ix, data.0); +//! # } +//! # +//! # fn sample(&self, ixs: &Vec) -> Self { +//! # Self(self.0.sample(ixs)) +//! # } +//! # } +//! # +//! # impl Act_ for TestAct { +//! # fn len(&self) -> usize { +//! # self.0.len() +//! # } +//! # } +//! # +//! # impl From for TestAct { +//! # fn from(t: candle_core::Tensor) -> Self { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl Into for TestAct { +//! # fn into(self) -> candle_core::Tensor { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl Env_ for TestEnv { +//! # type Config = ::Config; +//! # type Obs = TestObs; +//! # type Act = TestAct; +//! # type Info = ::Info; +//! # +//! # fn build(config: &Self::Config, seed: i64) -> Result { +//! # Ok(Self(TestEnv_::build(&config, seed).unwrap())) +//! # } +//! # +//! # fn step(&mut self, act: &TestAct) -> (Step, Record) { +//! # let (step, record) = self.0.step(&act.0); +//! # let step = Step { +//! # obs: TestObs(step.obs), +//! # act: TestAct(step.act), +//! # reward: step.reward, +//! # is_terminated: step.is_terminated, +//! # is_truncated: step.is_truncated, +//! # info: step.info, +//! # init_obs: TestObs(step.init_obs), +//! # }; +//! # (step, record) +//! # } +//! # +//! # fn reset(&mut self, is_done: Option<&Vec>) -> Result { +//! # Ok(TestObs(self.0.reset(is_done).unwrap())) +//! # } +//! # +//! # fn step_with_reset(&mut self, a: &TestAct) -> (Step, Record) { +//! # let (step, record) = self.0.step_with_reset(&a.0); +//! # let step = Step { +//! # obs: TestObs(step.obs), +//! # act: TestAct(step.act), +//! # reward: step.reward, +//! # is_terminated: step.is_terminated, +//! # is_truncated: step.is_truncated, +//! # info: step.info, +//! # init_obs: TestObs(step.init_obs), +//! # }; +//! # (step, record) +//! # } +//! # +//! # fn reset_with_index(&mut self, ix: usize) -> Result { +//! # Ok(TestObs(self.0.reset_with_index(ix).unwrap())) +//! # } +//! # } +//! # +//! # type Env = TestEnv; +//! # type ObsBatch = TestObsBatch; +//! # type ActBatch = TestActBatch; +//! # type ReplayBuffer = SimpleReplayBuffer; +//! # +//! const DIM_OBS: i64 = 3; +//! const DIM_ACT: i64 = 1; +//! const LR_ACTOR: f64 = 1e-3; +//! const LR_CRITIC: f64 = 1e-3; +//! const BATCH_SIZE: usize = 256; +//! //! fn create_agent(in_dim: i64, out_dim: i64) -> Sac { -//! let device = tch::Device::cuda_if_available(); +//! let device = candle_core::Device::cuda_if_available(0).unwrap(); +//! //! let actor_config = ActorConfig::default() //! .opt_config(OptimizerConfig::Adam { lr: LR_ACTOR }) //! .out_dim(out_dim) @@ -12,25 +158,13 @@ //! let critic_config = CriticConfig::default() //! .opt_config(OptimizerConfig::Adam { lr: LR_CRITIC }) //! .q_config(MlpConfig::new(in_dim + out_dim, vec![64, 64], 1, true)); -//! let sac_config = SacConfig::default() +//! let sac_config = SacConfig::::default() //! .batch_size(BATCH_SIZE) -//! .min_transitions_warmup(N_TRANSITIONS_WARMUP) //! .actor_config(actor_config) //! .critic_config(critic_config) //! .device(device); //! Sac::build(sac_config) //! } -//! -//! fn train(max_opts: usize, model_dir: &str, eval_interval: usize) -> Result<()> { -//! let trainer = //... -//! let mut agent = create_agent(DIM_OBS, DIM_ACT); -//! let mut recorder = TensorboardRecorder::new(model_dir); -//! let mut evaluator = Evaluator::new(&env_config(), 0, N_EPISODES_PER_EVAL)?; -//! -//! trainer.train(&mut agent, &mut recorder, &mut evaluator)?; -//! -//! Ok(()) -//! } //! ``` mod actor; mod base; diff --git a/border-candle-agent/src/sac/config.rs b/border-candle-agent/src/sac/config.rs index b789090f..3256740f 100644 --- a/border-candle-agent/src/sac/config.rs +++ b/border-candle-agent/src/sac/config.rs @@ -18,7 +18,7 @@ use std::{ path::Path, }; -/// Constructs [`Sac`](super::Sac). +/// Configuration of [`Sac`](super::Sac). #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Deserialize, Serialize, PartialEq)] pub struct SacConfig diff --git a/border-candle-agent/src/tensor_batch.rs b/border-candle-agent/src/tensor_batch.rs index 21c031bb..410ac023 100644 --- a/border-candle-agent/src/tensor_batch.rs +++ b/border-candle-agent/src/tensor_batch.rs @@ -1,7 +1,9 @@ use border_core::generic_replay_buffer::BatchBase; use candle_core::{error::Result, DType, Device, Tensor}; -/// Adds capability of constructing [Tensor] with a static method. +/// Adds capability of constructing [`Tensor`] with a static method. +/// +/// [`Tensor`]: https://docs.rs/candle-core/0.4.1/candle_core/struct.Tensor.html pub trait ZeroTensor { /// Constructs zero tensor. fn zeros(shape: &[usize]) -> Result; @@ -28,6 +30,8 @@ impl ZeroTensor for i64 { /// A buffer consisting of a [`Tensor`]. /// /// The internal buffer is `Vec`. +/// +/// [`Tensor`]: https://docs.rs/candle-core/0.4.1/candle_core/struct.Tensor.html #[derive(Clone, Debug)] pub struct TensorBatch { buf: Vec, diff --git a/border-candle-agent/src/util.rs b/border-candle-agent/src/util.rs index dad828f6..1e967546 100644 --- a/border-candle-agent/src/util.rs +++ b/border-candle-agent/src/util.rs @@ -23,27 +23,10 @@ pub enum CriticLoss { SmoothL1, } -// /// Apply soft update on a model. -// /// -// /// Variables are identified by their names. -// pub fn track(dest: &mut M, src: &mut M, tau: f64) { -// let src = &mut src.get_var_store().variables(); -// let dest = &mut dest.get_var_store().variables(); -// debug_assert_eq!(src.len(), dest.len()); - -// let names = src.keys(); -// tch::no_grad(|| { -// for name in names { -// let src = src.get(name).unwrap(); -// let dest = dest.get_mut(name).unwrap(); -// dest.copy_(&(tau * src + (1.0 - tau) * &*dest)); -// } -// }); -// trace!("soft update"); -// } - -/// Apply soft update on model parameters. +/// Apply soft update on variables. /// +/// Variables are identified by their names. +/// /// dest = tau * src + (1.0 - tau) * dest pub fn track(dest: &VarMap, src: &VarMap, tau: f64) -> Result<()> { trace!("dest"); @@ -69,6 +52,7 @@ pub fn track(dest: &VarMap, src: &VarMap, tau: f64) -> Result<()> { // v // } +/// Interface for handling output dimensions. pub trait OutDim { /// Returns the output dimension. fn get_out_dim(&self) -> i64; @@ -141,6 +125,7 @@ pub fn smooth_l1_loss(x: &Tensor, y: &Tensor) -> Result f32 { t.broadcast_sub(&t.mean_all().unwrap()) .unwrap() @@ -154,6 +139,7 @@ pub fn std(t: &Tensor) -> f32 { .unwrap() } +/// Returns the mean and standard deviation of the parameters. pub fn param_stats(varmap: &VarMap) -> Record { let mut record = Record::empty(); diff --git a/border-core/src/lib.rs b/border-core/src/lib.rs index c14d4a8a..0f8efd09 100644 --- a/border-core/src/lib.rs +++ b/border-core/src/lib.rs @@ -4,26 +4,25 @@ //! # Observation and action //! //! [`Obs`] and [`Act`] traits are abstractions of observation and action in environments. -//! These traits can handle two or more samples for implementing vectorized environments. +//! These traits can handle two or more samples for implementing vectorized environments, +//! although there is currently no implementation of vectorized environment. //! //! # Environment //! //! [`Env`] trait is an abstraction of environments. It has four associated types: //! `Config`, `Obs`, `Act` and `Info`. `Obs` and `Act` are concrete types of //! observation and action of the environment. -//! These must implement [`Obs`] and [`Act`] traits, respectively. +//! These types must implement [`Obs`] and [`Act`] traits, respectively. //! The environment that implements [`Env`] generates [`Step`] object //! at every environment interaction step with [`Env::step()`] method. -//! -//! `Info` stores some information at every step of interactions of an agent and +//! [`Info`] stores some information at every step of interactions of an agent and //! the environment. It could be empty (zero-sized struct). `Config` represents //! configurations of the environment and is used to build. //! //! # Policy //! -//! [`Policy`] represents a policy, from which actions are sampled for -//! environment `E`. [`Policy::sample()`] takes `E::Obs` and emits `E::Act`. -//! It could be probabilistic or deterministic. +//! [`Policy`] represents a policy. [`Policy::sample()`] takes `E::Obs` and +//! generates `E::Act`. It could be probabilistic or deterministic. //! //! # Agent //! @@ -32,34 +31,36 @@ //! the agent's policy might be probabilistic for exploration, while in evaluation mode, //! the policy might be deterministic. //! -//! [`Agent::opt()`] method does a single optimization step. The definition of an -//! optimization step depends on each agent. It might be multiple stochastic gradient +//! The [`Agent::opt()`] method performs a single optimization step. The definition of an +//! optimization step varies for each agent. It might be multiple stochastic gradient //! steps in an optimization step. Samples for training are taken from //! [`R: ReplayBufferBase`][`ReplayBufferBase`]. //! -//! This trait also has methods for saving/loading the trained policy -//! in the given directory. +//! This trait also has methods for saving/loading parameters of the trained policy +//! in a directory. //! //! # Batch //! //! [`TransitionBatch`] is a trait of a batch of transitions `(o_t, r_t, a_t, o_t+1)`. -//! This is used to train [`Agent`]s with an RL algorithm. +//! This trait is used to train [`Agent`]s using an RL algorithm. //! -//! # Replay buffer +//! # Replay buffer and experience buffer //! -//! [`ReplayBufferBase`] trait is an abstraction of replay buffers. For handling samples, -//! there are two associated types: `Item` and `Batch`. `Item` is a type -//! representing samples pushed to the buffer. These samples might be generated from -//! [`Step`]. [`StepProcessor`] trait provides the interface -//! for converting [`Step`] into `Item`. +//! [`ReplayBufferBase`] trait is an abstraction of replay buffers. +//! One of the associated type [`ReplayBufferBase::Batch`] represents samples taken from +//! the buffer for training [`Agent`]s. Agents must implements [`Agent::opt()`] method, +//! where [`ReplayBufferBase::Batch`] has an appropriate type or trait bound(s) to train +//! the agent. //! -//! `Batch` is a type of samples taken from the buffer for training [`Agent`]s. -//! The user implements [`Agent::opt()`] method such that it handles `Batch` objects -//! for doing an optimization step. +//! As explained above, [`ReplayBufferBase`] trait has an ability to generates batches +//! of samples with which agents are trained. On the other hand, [`ExperienceBufferBase`] +//! trait has an ability to store samples. [`ExperienceBufferBase::push()`] is used to push +//! samples of type [`ExperienceBufferBase::Item`], which might be obtained via interaction +//! steps with an environment. //! //! ## A reference implementation //! -//! [`SimpleReplayBuffer`] implementats [`ReplayBufferBase`]. +//! [`SimpleReplayBuffer`] implementats both [`ReplayBufferBase`] and [`ExperienceBufferBase`]. //! This type has two parameters `O` and `A`, which are representation of //! observation and action in the replay buffer. `O` and `A` must implement //! [`BatchBase`], which has the functionality of storing samples, like `Vec`, @@ -74,10 +75,12 @@ //! # Trainer //! //! [`Trainer`] manages training loop and related objects. The [`Trainer`] object is -//! built with configurations of [`Env`], [`ReplayBufferBase`], [`StepProcessor`] -//! and some training parameters. Then, [`Trainer::train`] method starts training loop with -//! given [`Agent`] and [`Recorder`](crate::record::Recorder). -//! +//! built with configurations of training parameters such as the maximum number of +//! optimization steps, model directory to save parameters of the agent during training, etc. +//! [`Trainer::train`] method executes online training of an agent on an environment. +//! In the training loop of this method, the agent interacts with the environment to +//! take samples and perform optimization steps. Some metrices are recorded at the same time. +//! //! [`SimpleReplayBuffer`]: replay_buffer::SimpleReplayBuffer //! [`SimpleReplayBuffer`]: generic_replay_buffer::SimpleReplayBuffer //! [`BatchBase`]: generic_replay_buffer::BatchBase @@ -98,3 +101,214 @@ pub use base::{ mod trainer; pub use evaluator::{DefaultEvaluator, Evaluator}; pub use trainer::{Sampler, Trainer, TrainerConfig}; + +// TODO: Consider to compile this module only for tests. +/// Agent and Env for testing. +pub mod test { + use serde::{Deserialize, Serialize}; + + /// Obs for testing. + #[derive(Clone, Debug)] + pub struct TestObs { + obs: usize, + } + + impl crate::Obs for TestObs { + fn dummy(_n: usize) -> Self { + Self { obs: 0 } + } + + fn len(&self) -> usize { + 1 + } + } + + /// Batch of obs for testing. + pub struct TestObsBatch { + obs: Vec, + } + + impl crate::generic_replay_buffer::BatchBase for TestObsBatch { + fn new(capacity: usize) -> Self { + Self { + obs: vec![0; capacity], + } + } + + fn push(&mut self, i: usize, data: Self) { + self.obs[i] = data.obs[0]; + } + + fn sample(&self, ixs: &Vec) -> Self { + let obs = ixs.iter().map(|ix| self.obs[*ix]).collect(); + Self { obs } + } + } + + impl From for TestObsBatch { + fn from(obs: TestObs) -> Self { + Self { obs: vec![obs.obs] } + } + } + + /// Act for testing. + #[derive(Clone, Debug)] + pub struct TestAct { + act: usize, + } + + impl crate::Act for TestAct {} + + /// Batch of act for testing. + pub struct TestActBatch { + act: Vec, + } + + impl From for TestActBatch { + fn from(act: TestAct) -> Self { + Self { act: vec![act.act] } + } + } + + impl crate::generic_replay_buffer::BatchBase for TestActBatch { + fn new(capacity: usize) -> Self { + Self { + act: vec![0; capacity], + } + } + + fn push(&mut self, i: usize, data: Self) { + self.act[i] = data.act[0]; + } + + fn sample(&self, ixs: &Vec) -> Self { + let act = ixs.iter().map(|ix| self.act[*ix]).collect(); + Self { act } + } + } + + /// Info for testing. + pub struct TestInfo {} + + impl crate::Info for TestInfo {} + + /// Environment for testing. + pub struct TestEnv { + state_init: usize, + state: usize, + } + + impl crate::Env for TestEnv { + type Config = usize; + type Obs = TestObs; + type Act = TestAct; + type Info = TestInfo; + + fn reset(&mut self, _is_done: Option<&Vec>) -> anyhow::Result { + self.state = self.state_init; + Ok(TestObs { obs: self.state }) + } + + fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result { + self.state = self.state_init; + Ok(TestObs { obs: self.state }) + } + + fn step_with_reset( + &mut self, + a: &Self::Act, + ) -> (crate::Step, crate::record::Record) + where + Self: Sized, + { + self.state = self.state + a.act; + let step = crate::Step { + obs: TestObs { obs: self.state }, + act: a.clone(), + reward: vec![0.0], + is_terminated: vec![0], + is_truncated: vec![0], + info: TestInfo {}, + init_obs: TestObs { + obs: self.state_init, + }, + }; + return (step, crate::record::Record::empty()); + } + + fn step(&mut self, a: &Self::Act) -> (crate::Step, crate::record::Record) + where + Self: Sized, + { + self.state = self.state + a.act; + let step = crate::Step { + obs: TestObs { obs: self.state }, + act: a.clone(), + reward: vec![0.0], + is_terminated: vec![0], + is_truncated: vec![0], + info: TestInfo {}, + init_obs: TestObs { + obs: self.state_init, + }, + }; + return (step, crate::record::Record::empty()); + } + + fn build(config: &Self::Config, _seed: i64) -> anyhow::Result + where + Self: Sized, + { + Ok(Self { + state_init: *config, + state: 0, + }) + } + } + + type ReplayBuffer = + crate::generic_replay_buffer::SimpleReplayBuffer; + + /// Agent for testing. + pub struct TestAgent {} + + #[derive(Clone, Deserialize, Serialize)] + /// Config of agent for testing. + pub struct TestAgentConfig; + + impl crate::Agent for TestAgent { + fn train(&mut self) {} + + fn is_train(&self) -> bool { + false + } + + fn eval(&mut self) {} + + fn opt_with_record(&mut self, _buffer: &mut ReplayBuffer) -> crate::record::Record { + crate::record::Record::empty() + } + + fn save_params>(&self, _path: T) -> anyhow::Result<()> { + Ok(()) + } + + fn load_params>(&mut self, _path: T) -> anyhow::Result<()> { + Ok(()) + } + } + + impl crate::Policy for TestAgent { + fn sample(&mut self, _obs: &TestObs) -> TestAct { + TestAct { act: 1 } + } + } + + impl crate::Configurable for TestAgent { + type Config = TestAgentConfig; + + fn build(_config: Self::Config) -> Self { + Self {} + } + } +} diff --git a/border-derive/Cargo.toml b/border-derive/Cargo.toml index 53ba7fff..70a4a40f 100644 --- a/border-derive/Cargo.toml +++ b/border-derive/Cargo.toml @@ -25,16 +25,10 @@ border-tch-agent = { version = "0.0.7", path = "../border-tch-agent" } border-candle-agent = { version = "0.0.7", path = "../border-candle-agent" } border-py-gym-env = { version = "0.0.7", path = "../border-py-gym-env" } border-core = { version = "0.0.7", path = "../border-core" } +border-atari-env = { version = "0.0.7", path = "../border-atari-env" } ndarray = { workspace = true } tch = { workspace = true } candle-core = { workspace = true } -# [features] -# default = ["tch"] - -[[example]] -name = "test1" -required-features = ["tch"] - [package.metadata.docs.rs] features = ["doc-only"] diff --git a/border-derive/examples/border_atari_act.rs b/border-derive/examples/border_atari_act.rs new file mode 100644 index 00000000..4f7b002a --- /dev/null +++ b/border-derive/examples/border_atari_act.rs @@ -0,0 +1,8 @@ +use border_atari_env::BorderAtariAct; +use border_derive::Act; + +#[allow(dead_code)] +#[derive(Clone, Debug, Act)] +struct MyAct(BorderAtariAct); + +fn main() {} diff --git a/border-derive/examples/border_gym_cont_act.rs b/border-derive/examples/border_gym_cont_act.rs new file mode 100644 index 00000000..9015aca0 --- /dev/null +++ b/border-derive/examples/border_gym_cont_act.rs @@ -0,0 +1,8 @@ +use border_derive::Act; +use border_py_gym_env::GymContinuousAct; + +#[allow(dead_code)] +#[derive(Clone, Debug, Act)] +struct MyAct(GymContinuousAct); + +fn main() {} diff --git a/border-derive/examples/border_gym_disc_act.rs b/border-derive/examples/border_gym_disc_act.rs new file mode 100644 index 00000000..05d0ea07 --- /dev/null +++ b/border-derive/examples/border_gym_disc_act.rs @@ -0,0 +1,8 @@ +use border_derive::Act; +use border_py_gym_env::GymDiscreteAct; + +#[allow(dead_code)] +#[derive(Clone, Debug, Act)] +struct MyAct(GymDiscreteAct); + +fn main() {} diff --git a/border-derive/examples/border_tensor_batch.rs b/border-derive/examples/border_tensor_batch.rs new file mode 100644 index 00000000..697e7b32 --- /dev/null +++ b/border-derive/examples/border_tensor_batch.rs @@ -0,0 +1,8 @@ +use border_derive::BatchBase; +use border_tch_agent::TensorBatch; + +#[allow(dead_code)] +#[derive(Clone, BatchBase)] +pub struct ObsBatch(TensorBatch); + +fn main() {} diff --git a/border-derive/examples/test1.rs b/border-derive/examples/test1.rs deleted file mode 100644 index 3551ff5a..00000000 --- a/border-derive/examples/test1.rs +++ /dev/null @@ -1,40 +0,0 @@ -use border_derive::{Act, SubBatch}; -use border_py_gym_env::GymDiscreteAct; -use border_tch_agent::TensorBatch; -use ndarray::ArrayD; -use std::convert::TryFrom; -use tch::Tensor; - -#[derive(Debug, Clone)] -struct Obs(ArrayD); - -#[derive(SubBatch)] -struct ObsBatch(TensorBatch); - -impl From for Tensor { - fn from(value: Obs) -> Self { - Tensor::try_from(&value.0).unwrap() - } -} - -impl From for ObsBatch { - fn from(obs: Obs) -> Self { - let tensor = obs.into(); - Self(TensorBatch::from_tensor(tensor)) - } -} - -#[derive(Clone, Debug, Act)] -struct Act(GymDiscreteAct); - -#[derive(SubBatch)] -struct ActBatch(TensorBatch); - -impl From for ActBatch { - fn from(act: Act) -> Self { - let tensor = act.into(); - Self(TensorBatch::from_tensor(tensor)) - } -} - -fn main() {} diff --git a/border-derive/src/act.rs b/border-derive/src/act.rs index f18566f3..1ddf9ea5 100644 --- a/border-derive/src/act.rs +++ b/border-derive/src/act.rs @@ -56,7 +56,8 @@ fn py_gym_env_cont_act( .iter() .map(|x| *x as usize) .collect::>(); - let act: Vec = t.into(); + use std::convert::TryInto; + let act: Vec = t.try_into().unwrap(); let act = ndarray::Array1::::from(act).into_shape(ndarray::IxDyn(&shape)).unwrap(); @@ -121,7 +122,8 @@ fn py_gym_env_disc_act( impl From for #ident { fn from(t: tch::Tensor) -> Self { - let data: Vec = t.into(); + use std::convert::TryInto; + let data: Vec = t.try_into().unwrap(); let data: Vec<_> = data.iter().map(|e| *e as i32).collect(); #ident(GymDiscreteAct::new(data)) } diff --git a/border-derive/src/lib.rs b/border-derive/src/lib.rs index 020fc23d..e5874c76 100644 --- a/border-derive/src/lib.rs +++ b/border-derive/src/lib.rs @@ -1,9 +1,207 @@ -//! Derive macros for making newtypes of types that implements -//! `border_core::Obs`, `border_core::Act` and -//! `order_core::replay_buffer::SubBatch`. +//! Derive macros for implementing [`border_core::Act`] and +//! [`border_core::generic_replay_buffer::BatchBase`]. //! -//! These macros will implements some conversion traits for combining -//! interfaces of an environment and an agent. +//! # Examples +//! +//! ## Newtype for [`BorderAtariAct`] +//! +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_atari_env::BorderAtariAct; +//! # +//! #[derive(Clone, Debug, Act)] +//! struct MyAct(BorderAtariAct); +//! ``` +//! +//! The above code will generate the following implementation: +//! +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_atari_env::BorderAtariAct; +//! # +//! #[derive(Clone, Debug)] +//! struct MyAct(BorderAtariAct); +//! impl border_core::Act for MyAct { +//! fn len(&self) -> usize { +//! self.0.len() +//! } +//! } +//! impl Into for MyAct { +//! fn into(self) -> BorderAtariAct { +//! self.0 +//! } +//! } +//! /// The following code is generated when features="tch" is enabled. +//! impl From for tch::Tensor { +//! fn from(act: MyAct) -> tch::Tensor { +//! let v = vec![act.0.act as i64]; +//! let t: tch::Tensor = std::convert::TryFrom::>::try_from(v).unwrap(); +//! t.unsqueeze(0) +//! } +//! } +//! impl From for MyAct { +//! fn from(t: tch::Tensor) -> Self { +//! let data: Vec = { +//! let t = t.to_dtype(tch::Kind::Int64, false, true); +//! let n = t.numel(); +//! let mut data = vec![0i64; n]; +//! t.f_copy_data(&mut data, n).unwrap(); +//! data +//! }; +//! MyAct(BorderAtariAct::new(data[0] as u8)) +//! } +//! } +//! ``` +//! +//! ## Newtype for [`GymContinuousAct`] +//! +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_py_gym_env::GymContinuousAct; +//! # +//! #[derive(Clone, Debug, Act)] +//! struct MyAct(GymContinuousAct); +//! ``` +//! +//! The above code will generate the following implementation: +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_py_gym_env::GymContinuousAct; +//! # +//! #[derive(Clone, Debug)] +//! struct MyAct(GymContinuousAct); +//! impl border_core::Act for MyAct { +//! fn len(&self) -> usize { +//! self.0.len() +//! } +//! } +//! impl Into for MyAct { +//! fn into(self) -> GymContinuousAct { +//! self.0 +//! } +//! } +//! /// The following code is generated when features="tch" is enabled. +//! impl From for tch::Tensor { +//! fn from(act: MyAct) -> tch::Tensor { +//! let v = act.0.act.iter().map(|e| *e as f32).collect::>(); +//! let t: tch::Tensor = std::convert::TryFrom::>::try_from(v).unwrap(); +//! t.unsqueeze(0) +//! } +//! } +//! impl From for MyAct { +//! /// `t` must be a 1-dimentional tensor of `f32`. +//! fn from(t: tch::Tensor) -> Self { +//! let shape = t.size()[1..].iter().map(|x| *x as usize).collect::>(); +//! use std::convert::TryInto; +//! let act: Vec = t.try_into().unwrap(); +//! let act = ndarray::Array1::::from(act) +//! .into_shape(ndarray::IxDyn(&shape)) +//! .unwrap(); +//! MyAct(GymContinuousAct::new(act)) +//! } +//! } +//! ``` +//! +//! ## Newtype for [`GymDiscreteAct`] +//! +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_py_gym_env::GymDiscreteAct; +//! # +//! #[derive(Clone, Debug, Act)] +//! struct MyAct(GymDiscreteAct); +//! ``` +//! +//! The above code will generate the following implementation: +//! ``` +//! # use border_core::Act; +//! # use border_derive::Act; +//! # use border_py_gym_env::GymDiscreteAct; +//! # +//! #[derive(Clone, Debug)] +//! struct MyAct(GymDiscreteAct); +//! impl border_core::Act for MyAct { +//! fn len(&self) -> usize { +//! self.0.len() +//! } +//! } +//! impl Into for MyAct { +//! fn into(self) -> GymDiscreteAct { +//! self.0 +//! } +//! } +//! impl From for tch::Tensor { +//! fn from(act: MyAct) -> tch::Tensor { +//! let v = act.0.act.iter().map(|e| *e as i64).collect::>(); +//! let t: tch::Tensor = std::convert::TryFrom::>::try_from(v).unwrap(); +//! t.unsqueeze(0) +//! } +//! } +//! impl From for MyAct { +//! fn from(t: tch::Tensor) -> Self { +//! use std::convert::TryInto; +//! let data: Vec = t.try_into().unwrap(); +//! let data: Vec<_> = data.iter().map(|e| *e as i32).collect(); +//! MyAct(GymDiscreteAct::new(data)) +//! } +//! } +//! ``` +//! +//! ## Newtype for [`TensorBatch`] +//! +//! ``` +//! # use border_derive::BatchBase; +//! # use border_tch_agent::TensorBatch; +//! # +//! #[derive(Clone, BatchBase)] +//! struct MyBatch(TensorBatch); +//! ``` +//! +//! The above code will generate the following implementation: +//! +//! ``` +//! # use border_derive::BatchBase; +//! # use border_tch_agent::TensorBatch; +//! # +//! #[derive(Clone)] +//! struct ObsBatch(TensorBatch); +//! impl border_core::generic_replay_buffer::BatchBase for ObsBatch { +//! fn new(capacity: usize) -> Self { +//! Self(TensorBatch::new(capacity)) +//! } +//! fn push(&mut self, i: usize, data: Self) { +//! self.0.push(i, data.0) +//! } +//! fn sample(&self, ixs: &Vec) -> Self { +//! let buf = self.0.sample(ixs); +//! Self(buf) +//! } +//! } +//! impl From for ObsBatch { +//! fn from(obs: TensorBatch) -> Self { +//! ObsBatch(obs) +//! } +//! } +//! impl From for tch::Tensor { +//! fn from(b: ObsBatch) -> Self { +//! b.0.into() +//! } +//! } +//! ``` +//! +//! [`border_core::Obs`]: border_core::Obs +//! [`border_core::Act`]: border_core::Act +//! [`border_core::generic_replay_buffer::BatchBase`]: border_core::generic_replay_buffer::BatchBase +//! [`BorderAtariAct`]: border_atari_env::BorderAtariAct +//! [`GymContinuousAct`]: border_py_gym_env::GymContinuousAct +//! [`GymDiscreteAct`]: border_py_gym_env::GymDiscreteAct +//! [`TensorBatch`]: border_tch_agent::TensorBatch + mod act; mod obs; mod subbatch; @@ -11,18 +209,19 @@ use proc_macro::{self, TokenStream}; /// Implements `border_core::Obs` for the newtype that wraps /// PyGymEnvObs or BorderAtariObs. +#[deprecated] #[proc_macro_derive(Obs, attributes(my_trait))] pub fn derive1(input: TokenStream) -> TokenStream { obs::derive(input) } -/// Implements `border_core::generic_replay_buffer::BatchBase` for the newtype. +/// Implements [`border_core::generic_replay_buffer::BatchBase`] for the newtype. #[proc_macro_derive(BatchBase, attributes(my_trait))] pub fn derive2(input: TokenStream) -> TokenStream { subbatch::derive(input) } -/// Implements `border_core::Act` for the newtype. +/// Implements [`border_core::Act`] for the newtype. #[proc_macro_derive(Act, attributes(my_trait))] pub fn derive3(input: TokenStream) -> TokenStream { act::derive(input) diff --git a/border-derive/src/obs.rs b/border-derive/src/obs.rs index 8b45dda3..a63c4d7c 100644 --- a/border-derive/src/obs.rs +++ b/border-derive/src/obs.rs @@ -5,12 +5,11 @@ use syn::{parse_macro_input, DeriveInput}; pub fn derive(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input); - // let opts = Opts::from_derive_input(&input).expect("Wrong options"); let DeriveInput { ident, data, .. } = input; let field_type = get_field_type(data); let field_type_str = get_type_str( field_type.clone(), - "The item for deriving Obs must be a new type like MyObs(PyGymEnvObs)", + "The item for deriving Obs must be a new type like MyObs(BorderAtariObs)", ); // let output = if field_type_str == "PyGymEnvObs" { diff --git a/border-policy-no-backend/Cargo.toml b/border-policy-no-backend/Cargo.toml new file mode 100644 index 00000000..53bf671d --- /dev/null +++ b/border-policy-no-backend/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "border-policy-no-backend" +version.workspace = true +edition.workspace = true +description.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true +license.workspace = true +readme = "README.md" + +[dependencies] +border-core = { version = "0.0.7", path = "../border-core" } +border-tch-agent = { version = "0.0.7", path = "../border-tch-agent", optional = true } +serde = { workspace = true, features = ["derive"] } +log = { workspace = true } +anyhow = { workspace = true } +tch = { workspace = true, optional = true } + +[dev-dependencies] +tempdir = { workspace = true } +tch = { workspace = true } + + +[features] +border-tch-agent = ["dep:border-tch-agent", "dep:tch"] diff --git a/border-policy-no-backend/src/lib.rs b/border-policy-no-backend/src/lib.rs new file mode 100644 index 00000000..93053528 --- /dev/null +++ b/border-policy-no-backend/src/lib.rs @@ -0,0 +1,6 @@ +//! Policy with no backend. +mod mat; +mod mlp; + +pub use mat::Mat; +pub use mlp::Mlp; diff --git a/border-policy-no-backend/src/mat.rs b/border-policy-no-backend/src/mat.rs new file mode 100644 index 00000000..5a429cd8 --- /dev/null +++ b/border-policy-no-backend/src/mat.rs @@ -0,0 +1,107 @@ +//! A matrix object. +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct Mat { + pub data: Vec, + pub shape: Vec, +} + +#[cfg(feature = "border-tch-agent")] +impl From for Mat { + fn from(x: tch::Tensor) -> Self { + let shape: Vec = x.size().iter().map(|e| *e as i32).collect(); + let (n, shape) = match shape.len() { + 1 => (shape[0] as usize, vec![shape[0], 1]), + 2 => ((shape[0] * shape[1]) as usize, shape), + _ => panic!("Invalid matrix size: {:?}", shape), + }; + let mut data: Vec = vec![0f32; n]; + x.f_copy_data(&mut data, n).unwrap(); + Self { data, shape } + } +} + +impl Mat { + pub fn matmul(&self, x: &Mat) -> Self { + let (m, l, n) = ( + self.shape[0] as usize, + self.shape[1] as usize, + x.shape[1] as usize, + ); + // println!("{:?}", (m, l, x.shape[0], n)); + let mut data = vec![0.0f32; (m * n) as usize]; + for i in 0..m as usize { + for j in 0..n as usize { + let kk = i * n as usize + j; + for k in 0..l as usize { + data[kk] += self.data[i * l + k] * x.data[k * n + j]; + } + } + } + + Self { + shape: vec![m as _, n as _], + data, + } + } + + pub fn add(&self, x: &Mat) -> Self { + if self.shape[0] != x.shape[0] || self.shape[1] != x.shape[1] { + panic!( + "Trying to add matrices of different sizes: {:?}", + (&self.shape, &x.shape) + ); + } + + let data = self + .data + .iter() + .zip(x.data.iter()) + .map(|(a, b)| *a + *b) + .collect(); + + Mat { + data, + shape: self.shape.clone(), + } + } + + pub fn relu(&self) -> Self { + let data = self + .data + .iter() + .map(|a| match *a < 0. { + true => 0., + false => *a, + }) + .collect(); + + Self { + data, + shape: self.shape.clone(), + } + } + + pub fn empty() -> Self { + Self { + data: vec![], + shape: vec![0, 0], + } + } + + pub fn shape(&self) -> &Vec { + &self.shape + } + + pub fn new(data: Vec, shape: Vec) -> Self { + Self { data, shape } + } +} + +impl From> for Mat { + fn from(x: Vec) -> Self { + let shape = vec![x.len() as i32, 1]; + Self { shape, data: x } + } +} diff --git a/border-policy-no-backend/src/mlp.rs b/border-policy-no-backend/src/mlp.rs new file mode 100644 index 00000000..8805a50d --- /dev/null +++ b/border-policy-no-backend/src/mlp.rs @@ -0,0 +1,44 @@ +use crate::Mat; +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "border-tch-agent")] +use tch::nn::VarStore; + +#[derive(Clone, Debug, Deserialize, Serialize)] +/// Multilayer perceptron with ReLU activation function. +pub struct Mlp { + /// Weights of layers. + ws: Vec, + + /// Biases of layers. + bs: Vec, +} + +impl Mlp { + pub fn forward(&self, x: &Mat) -> Mat { + let n_layers = self.ws.len(); + let mut x = x.clone(); + for i in 0..n_layers { + x = self.ws[i].matmul(&x).add(&self.bs[i]); + if i != n_layers - 1 { + x = x.relu(); + } + } + x + } + + #[cfg(feature = "border-tch-agent")] + pub fn from_varstore(vs: &VarStore, w_names: &[&str], b_names: &[&str]) -> Self { + let vars = vs.variables(); + let ws: Vec = w_names + .iter() + .map(|name| vars[&name.to_string()].copy().into()) + .collect(); + let bs: Vec = b_names + .iter() + .map(|name| vars[&name.to_string()].copy().into()) + .collect(); + + Self { ws, bs } + } +} diff --git a/border-policy-no-backend/tests/test.rs b/border-policy-no-backend/tests/test.rs new file mode 100644 index 00000000..f1b66b07 --- /dev/null +++ b/border-policy-no-backend/tests/test.rs @@ -0,0 +1,24 @@ +use border_policy_no_backend::Mat; +use tch::Tensor; + +#[test] +fn test_matmul() { + let x1 = Tensor::from_slice2(&[&[1.0f32, 2., 3.], &[4., 5., 6.]]); + let y1 = Tensor::from_slice(&[7.0f32, 8., 9.]); + let z1 = x1.matmul(&y1); + + let x2: Mat = x1.into(); + let y2: Mat = y1.into(); + let z2 = x2.matmul(&y2); + + let z3 = { + let mut data = vec![0.0f32; 2]; + z1.f_copy_data(&mut data, 2).unwrap(); + Mat { + shape: vec![2 as _, 1 as _], + data, + } + }; + + assert_eq!(z2, z3) +} diff --git a/border-py-gym-env/src/act/continuous_filter.rs b/border-py-gym-env/src/act/continuous_filter.rs index eb0350cf..68251c92 100644 --- a/border-py-gym-env/src/act/continuous_filter.rs +++ b/border-py-gym-env/src/act/continuous_filter.rs @@ -22,7 +22,7 @@ impl Default for ContinuousActFilterConfig { /// Raw filter for continuous actions. /// -/// Type `A` must implements `Into>` +/// Type `A` must implements `Into>`. #[derive(Clone, Debug)] pub struct ContinuousActFilter { // `true` indicates that this filter is used in a vectorized environment. diff --git a/border-py-gym-env/src/act_c.rs b/border-py-gym-env/src/act_c.rs deleted file mode 100644 index 574f1c13..00000000 --- a/border-py-gym-env/src/act_c.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Continuous action for [`GymEnv`](crate::GymEnv). -mod base; -pub use base::GymContinuousAct; -use ndarray::ArrayD; -use numpy::PyArrayDyn; -use pyo3::{IntoPy, PyObject}; - -/// Convert [`ArrayD`] to [`PyObject`]. -/// -/// This function does not support batch action. -pub fn to_pyobj(act: ArrayD) -> PyObject { - // let act = act.remove_axis(ndarray::Axis(0)); - pyo3::Python::with_gil(|py| { - let act = PyArrayDyn::::from_array(py, &act); - act.into_py(py) - }) -} diff --git a/border-py-gym-env/src/act_c/base.rs b/border-py-gym-env/src/act_c/base.rs deleted file mode 100644 index 4919bd58..00000000 --- a/border-py-gym-env/src/act_c/base.rs +++ /dev/null @@ -1,28 +0,0 @@ -use border_core::Act; -use ndarray::ArrayD; -use std::fmt::Debug; - -/// Represents an action. -#[derive(Clone, Debug)] -pub struct GymContinuousAct { - /// Stores an action. - pub act: ArrayD, -} - -impl GymContinuousAct { - /// Constructs an action. - pub fn new(act: ArrayD) -> Self { - Self { act } - } -} - -impl Act for GymContinuousAct { - fn len(&self) -> usize { - let shape = self.act.shape(); - if shape.len() == 1 { - 1 - } else { - shape[0] - } - } -} diff --git a/border-py-gym-env/src/act_d.rs b/border-py-gym-env/src/act_d.rs deleted file mode 100644 index 7cac4219..00000000 --- a/border-py-gym-env/src/act_d.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Discrete action for [`GymEnv`](crate::GymEnv). -mod base; -pub use base::GymDiscreteAct; diff --git a/border-py-gym-env/src/act_d/base.rs b/border-py-gym-env/src/act_d/base.rs deleted file mode 100644 index 5afb829b..00000000 --- a/border-py-gym-env/src/act_d/base.rs +++ /dev/null @@ -1,21 +0,0 @@ -use border_core::Act; -use std::fmt::Debug; - -/// Represents action. -#[derive(Clone, Debug)] -pub struct GymDiscreteAct { - pub act: Vec, -} - -impl GymDiscreteAct { - /// Constructs a discrete action. - pub fn new(act: Vec) -> Self { - Self { act } - } -} - -impl Act for GymDiscreteAct { - fn len(&self) -> usize { - self.act.len() - } -} diff --git a/border-py-gym-env/src/atari.rs b/border-py-gym-env/src/atari.rs index 6b64f1ad..cd3b130e 100644 --- a/border-py-gym-env/src/atari.rs +++ b/border-py-gym-env/src/atari.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] /// Specifies training or evaluation mode. #[derive(Clone)] +// TODO: consider to remove this enum pub enum AtariWrapper { /// Training mode Train, diff --git a/border-py-gym-env/src/base.rs b/border-py-gym-env/src/base.rs index 11eb19f2..e0d09954 100644 --- a/border-py-gym-env/src/base.rs +++ b/border-py-gym-env/src/base.rs @@ -22,6 +22,8 @@ pub struct GymInfo {} impl Info for GymInfo {} /// Convert [`PyObject`] to [`GymEnv`]::Obs with a preprocessing. +/// +/// [`PyObject`]: https://docs.rs/pyo3/0.14.5/pyo3/type.PyObject.html pub trait GymObsFilter { /// Configuration. type Config: Clone + Default + Serialize + DeserializeOwned; @@ -50,7 +52,7 @@ pub trait GymObsFilter { /// Convert [`GymEnv`]::Act to [`PyObject`] with a preprocessing. /// -/// This trait should support vectorized environments. +/// [`PyObject`]: https://docs.rs/pyo3/0.14.5/pyo3/type.PyObject.html pub trait GymActFilter { /// Configuration. type Config: Clone + Default + Serialize + DeserializeOwned; @@ -79,7 +81,7 @@ pub trait GymActFilter { } } -/// An environment in [OpenAI gym](https://github.com/openai/gym). +/// An wrapper of [Gymnasium](https://gymnasium.farama.org). #[derive(Debug)] pub struct GymEnv where diff --git a/border-py-gym-env/src/lib.rs b/border-py-gym-env/src/lib.rs index c84f0612..80c4b101 100644 --- a/border-py-gym-env/src/lib.rs +++ b/border-py-gym-env/src/lib.rs @@ -4,34 +4,43 @@ //! It has been tested on some of [classic control](https://gymnasium.farama.org/environments/classic_control/) and //! [Gymnasium-Robotics](https://robotics.farama.org) environments. //! -//! ```note -//! In a past, [`Atari`](https://gym.openai.com/envs/#atari), and -//! [`PyBullet`](https://github.com/benelot/pybullet-gym) environments were supported. -//! However, currently they are not tested. -//! ``` -//! -//! This wrapper accepts array-like observation and action -//! ([`Box`](https://github.com/openai/gym/blob/master/gym/spaces/box.py) spaces), and -//! discrete action. In order to interact with Python interpreter where gym is running, -//! [`GymObsFilter`] and [`GymActFilter`] provides interfaces for converting Python object -//! (numpy array) to/from ndarrays in Rust. [`GymObsFilter`], -//! [`ContinuousActFilter`] and [`DiscreteActFilter`] do the conversion for environments -//! where observation and action are arrays. In addition to the data conversion between Python and Rust, -//! we can implements arbitrary preprocessing in these filters. For example, [`FrameStackFilter`] keeps -//! four consevutive observation frames (images) and outputs a stack of these frames. -//! -//! For Atari environments, a tweaked version of -//! [`atari_wrapper.py`](https://github.com/taku-y/border/blob/main/examples/atari_wrappers.py) -//! is required to be in `PYTHONPATH`. The frame stacking preprocessing is implemented in -//! [`FrameStackFilter`] as an [`GymObsFilter`]. -//! -//! Examples with a random controller ([`Policy`](border_core::Policy)) are in -//! [`examples`](https://github.com/taku-y/border/blob/main/border-py-gym-env/examples) directory. -//! Examples with `border-tch-agents`, which are collections of RL agents implemented with tch-rs, -//! are in [here](https://github.com/taku-y/border/blob/main/border/examples). +//! In order to bridge Python and Rust, we need to convert Python objects to Rust objects and vice versa. +//! +//! ## Observation +//! +//! Obsservation is created in Python and passed to Rust as a Python object. In order to convert +//! Python object to Rust object, this crate provides [`GymObsFilter`] trait. This trait has +//! [`GymObsFilter::filt`] method which converts Python object to Rust object. +//! The type of the Rust object after conversion corresponds to the type parameter `O` of the trait +//! and this is also the type of the observation in the environment, i.e., [`GymEnv`]`::Obs`. +//! +//! There are two built-in implementations of [`GymObsFilter`]: [`ArrayObsFilter`] and [`ArrayDictObsFilter`]. +//! [`ArrayObsFilter`] is for environments where observation is an array (e.g., CartPole). +//! Internally, the array is converted to [`ndarray::ArrayD`] from Python object. +//! Then, the array is converted to the type parameter `O` of the filter. +//! Since `O` must implement [`From`] by trait bound, the conversion is done +//! by calling `array.into()`. +//! +//! [`ArrayDictObsFilter`] is for environments where observation is a dictionary of arrays (e.g., FetchPickAndPlace). +//! Internally, the dictionary is converted to `Vec<(String, border_py_gym_env:util::Array)>` from Python object. +//! Then, `Vec<(String, border_py_gym_env:util::Array)>` is converted to `O` by calling `into()`. +//! +//! ## Action +//! +//! Action is created in [`Policy`] and passed to Python as a Python object. In order to convert +//! Rust object to Python object, this crate provides [`GymActFilter`] trait. This trait has +//! [`GymActFilter::filt`] method which converts Rust object of type `A`, which is the type parameter of +//! the trait, to Python object. +//! +//! There are two built-in implementations of [`GymActFilter`]: [`DiscreteActFilter`] and [`ContinuousActFilter`]. +//! [`DiscreteActFilter`] is for environments where action is discrete (e.g., CartPole). +//! This filter converts `A` to [`Vec`] and then to Python object. +//! [`ContinuousActFilter`] is for environments where action is continuous (e.g., Pendulum). +//! This filter converts `A` to [`ArrayD`] and then to Python object. +//! +//! [`Policy`]: border_core::Policy +//! [`ArrayD`]: https://docs.rs/ndarray/0.15.1/ndarray/type.ArrayD.html mod act; -mod act_c; -mod act_d; mod atari; mod base; mod config; @@ -41,14 +50,10 @@ mod vec; pub use act::{ ContinuousActFilter, ContinuousActFilterConfig, DiscreteActFilter, DiscreteActFilterConfig, }; -pub use act_c::{to_pyobj, GymContinuousAct}; -pub use act_d::GymDiscreteAct; -pub use atari::AtariWrapper; +use atari::AtariWrapper; pub use base::{GymActFilter, GymEnv, GymInfo, GymObsFilter}; pub use config::GymEnvConfig; #[allow(deprecated)] pub use obs::{ ArrayDictObsFilter, ArrayDictObsFilterConfig, ArrayObsFilter, ArrayObsFilterConfig, - FrameStackFilter, FrameStackFilterConfig, GymObs, }; -// pub use vec::{PyVecGymEnv, PyVecGymEnvConfig}; diff --git a/border-py-gym-env/src/obs.rs b/border-py-gym-env/src/obs.rs index 7da2730e..f40a5d3e 100644 --- a/border-py-gym-env/src/obs.rs +++ b/border-py-gym-env/src/obs.rs @@ -1,10 +1,5 @@ //! Observation for [`GymEnv`](crate::GymEnv). mod array_dict_filter; mod array_filter; -mod base; -mod frame_stack_filter; pub use array_dict_filter::{ArrayDictObsFilter, ArrayDictObsFilterConfig}; pub use array_filter::{ArrayObsFilter, ArrayObsFilterConfig}; -#[allow(deprecated)] -pub use base::GymObs; -pub use frame_stack_filter::{FrameStackFilter, FrameStackFilterConfig}; diff --git a/border-py-gym-env/src/obs/array_filter.rs b/border-py-gym-env/src/obs/array_filter.rs index 16a5d899..0cb4023a 100644 --- a/border-py-gym-env/src/obs/array_filter.rs +++ b/border-py-gym-env/src/obs/array_filter.rs @@ -24,6 +24,8 @@ impl Default for ArrayObsFilterConfig { /// An observation filter that convertes PyObject of an numpy array. /// /// Type parameter `O` must implements [`From`]`` and [`border_core::Obs`]. +/// +/// [`border_core::Obs`]: border_core::Obs pub struct ArrayObsFilter { /// Marker. pub phantom: PhantomData<(T1, T2, O)>, diff --git a/border-py-gym-env/src/obs/base.rs b/border-py-gym-env/src/obs/base.rs deleted file mode 100644 index b72fb680..00000000 --- a/border-py-gym-env/src/obs/base.rs +++ /dev/null @@ -1,119 +0,0 @@ -use crate::util::pyobj_to_arrayd; -use border_core::Obs; -use ndarray::{ArrayD, IxDyn}; -use num_traits::cast::AsPrimitive; -use numpy::Element; -use pyo3::PyObject; -use std::fmt::Debug; -use std::marker::PhantomData; -#[cfg(feature = "tch")] -use {std::convert::TryFrom, tch::Tensor}; - -/// Observation represented by an [ndarray::ArrayD]. -/// -/// `S` is the shape of an observation, except for batch and process dimensions. -/// `T` is the dtype of ndarray in the Python gym environment. -/// For some reason, the dtype of observations in Python gym environments seems to -/// vary, f32 or f64. To get observations in Rust side, the dtype is specified as a -/// type parameter, instead of checking the dtype of Python array at runtime. -#[deprecated] -#[derive(Clone, Debug)] -pub struct GymObs -where - T1: Element + Debug, - T2: 'static + Copy, -{ - pub obs: ArrayD, - pub(crate) phantom: PhantomData, -} - -#[allow(deprecated)] -impl From> for GymObs -where - T1: Element + Debug, - T2: 'static + Copy, -{ - fn from(obs: ArrayD) -> Self { - Self { - obs, - phantom: PhantomData, - } - } -} - -#[allow(deprecated)] -impl Obs for GymObs -where - T1: Debug + Element, - T2: 'static + Copy + Debug + num_traits::Zero, -{ - fn dummy(_n_procs: usize) -> Self { - // let shape = &mut S::shape().to_vec(); - // shape.insert(0, n_procs as _); - // trace!("Shape of TchPyGymEnvObs: {:?}", shape); - let shape = vec![0]; - Self { - obs: ArrayD::zeros(IxDyn(&shape[..])), - phantom: PhantomData, - } - } - - fn len(&self) -> usize { - self.obs.shape()[0] - } -} - -/// Convert numpy array of Python into [`GymObs`]. -#[allow(deprecated)] -impl From for GymObs -where - T1: Element + AsPrimitive + std::fmt::Debug, - T2: 'static + Copy, -{ - fn from(obs: PyObject) -> Self { - Self { - obs: pyobj_to_arrayd::(obs), - phantom: PhantomData, - } - } -} - -// #[cfg(feature = "tch")] -// impl From> for Tensor -// where -// S: Shape, -// T1: Element + Debug, -// T2: 'static + Copy, -// { -// fn from(obs: PyGymEnvObs) -> Tensor { -// let tmp = &obs.obs; -// Tensor::try_from(tmp).unwrap() -// // Tensor::try_from(&obs.obs).unwrap() -// } -// } - -#[allow(deprecated)] -#[cfg(feature = "tch")] -impl From> for Tensor -where - T1: Element + Debug, -{ - fn from(obs: GymObs) -> Tensor { - let tmp = &obs.obs; - Tensor::try_from(tmp).unwrap() - // Tensor::try_from(&obs.obs).unwrap() - } -} - -#[allow(deprecated)] -#[cfg(feature = "tch")] -impl From> for Tensor -where - T1: Element + Debug, -{ - fn from(obs: GymObs) -> Tensor { - let tmp = &obs.obs; - Tensor::try_from(tmp).unwrap() - // Tensor::try_from(&obs.obs).unwrap() - } -} diff --git a/border-py-gym-env/src/obs/frame_stack_filter.rs b/border-py-gym-env/src/obs/frame_stack_filter.rs deleted file mode 100644 index 4f42f0de..00000000 --- a/border-py-gym-env/src/obs/frame_stack_filter.rs +++ /dev/null @@ -1,250 +0,0 @@ -//! An observation filter with stacking observations (frames). -#[allow(deprecated)] -use super::GymObs; -use crate::GymObsFilter; -use border_core::{ - record::{Record, RecordValue}, - Obs, -}; -use ndarray::{ArrayD, Axis, SliceInfoElem}; //, SliceOrIndex}; - // use ndarray::{stack, ArrayD, Axis, IxDyn, SliceInfo, SliceInfoElem}; -use num_traits::cast::AsPrimitive; -use numpy::{Element, PyArrayDyn}; -use pyo3::{PyAny, PyObject}; -// use pyo3::{types::PyList, Py, PyAny, PyObject}; -use serde::{Deserialize, Serialize}; -use std::{fmt::Debug, marker::PhantomData}; -// use std::{convert::TryFrom, fmt::Debug, marker::PhantomData}; - -#[allow(deprecated)] -#[derive(Debug, Serialize, Deserialize)] -/// Configuration of [FrameStackFilter]. -#[derive(Clone)] -pub struct FrameStackFilterConfig { - n_procs: i64, - n_stack: i64, - vectorized: bool, -} - -impl Default for FrameStackFilterConfig { - fn default() -> Self { - Self { - n_procs: 1, - n_stack: 4, - vectorized: false, - } - } -} - -/// An observation filter with stacking sequence of original observations. -/// -/// The first element of the shape `S` denotes the number of stacks (`n_stack`) and the following elements -/// denote the shape of the partial observation, which is the observation of each environment -/// in the vectorized environment. -#[allow(deprecated)] -#[derive(Debug)] -pub struct FrameStackFilter -where - T1: Element + Debug + num_traits::identities::Zero + AsPrimitive, - T2: 'static + Copy + num_traits::Zero, - U: Obs + From>, -{ - // Each element in the vector corresponds to a process. - buffers: Vec>>, - - #[allow(dead_code)] - n_procs: i64, - - n_stack: i64, - - shape: Option>, - - // Verctorized environment is not supported - vectorized: bool, - - phantom: PhantomData<(T1, U)>, -} - -#[allow(deprecated)] -impl FrameStackFilter -where - T1: Element + Debug + num_traits::identities::Zero + AsPrimitive, - T2: 'static + Copy + num_traits::Zero, - U: Obs + From>, -{ - /// Returns the default configuration. - pub fn default_config() -> FrameStackFilterConfig { - FrameStackFilterConfig::default() - } - - /// Create slice for a dynamic array: equivalent to arr[j:(j+1), ::] in numpy. - /// - /// See - fn s(shape: &Option>, j: usize) -> Vec { - // The first index of self.shape corresponds to stacking dimension, - // specific index. - let mut slicer = vec![SliceInfoElem::Index(j as isize)]; - - // For remaining dimensions, all elements will be taken. - let n = shape.as_ref().unwrap().len() - 1; - let (start, end, step) = (0, None, 1); - - slicer.extend(vec![SliceInfoElem::Slice { start, end, step }; n]); - slicer - } - - /// Update the buffer of the stacked observations. - /// - /// * `i` - Index of process. - fn update_buffer(&mut self, i: i64, obs: &ArrayD) { - let arr = if let Some(arr) = &mut self.buffers[i as usize] { - arr - } else { - let mut shape = obs.shape().to_vec(); - self.shape = Some(shape.clone()); - shape.insert(0, self.n_stack as _); - self.buffers[i as usize] = Some(ArrayD::zeros(shape)); - self.buffers[i as usize].as_mut().unwrap() - }; - - // Shift stacks frame(j) <- frame(j - 1) for j=1,..,(n_stack - 1) - for j in (1..self.n_stack as usize).rev() { - let dst_slice = Self::s(&self.shape, j); - let src_slice = Self::s(&self.shape, j - 1); - let (mut dst, src) = arr.multi_slice_mut((dst_slice.as_slice(), src_slice.as_slice())); - dst.assign(&src); - } - arr.slice_mut(Self::s(&self.shape, 0).as_slice()) - .assign(obs) - } - - /// Fill the buffer, invoked when resetting - fn fill_buffer(&mut self, i: i64, obs: &ArrayD) { - if let Some(arr) = &mut self.buffers[i as usize] { - for j in (0..self.n_stack as usize).rev() { - let mut dst = arr.slice_mut(Self::s(&self.shape, j).as_slice()); - dst.assign(&obs); - } - } else { - unimplemented!("fill_buffer() was called before receiving the first sample."); - } - } - - /// Get ndarray from pyobj - fn get_ndarray(o: &PyAny) -> ArrayD { - debug_assert_eq!(o.get_type().name().unwrap(), "ndarray"); - let o: &PyArrayDyn = o.extract().unwrap(); - let o = o.to_owned_array(); - let o = o.mapv(|elem| elem.as_()); - o - } -} - -#[allow(deprecated)] -impl GymObsFilter for FrameStackFilter -where - T1: Element + Debug + num_traits::identities::Zero + AsPrimitive, - T2: 'static + Copy + num_traits::Zero + Into, - U: Obs + From>, -{ - type Config = FrameStackFilterConfig; - - fn build(config: &Self::Config) -> anyhow::Result - where - Self: Sized, - { - Ok(FrameStackFilter { - buffers: vec![None; config.n_procs as usize], - n_procs: config.n_procs, - n_stack: config.n_stack, - shape: None, - vectorized: config.vectorized, - phantom: PhantomData, - }) - } - - fn filt(&mut self, obs: PyObject) -> (U, Record) { - if self.vectorized { - unimplemented!(); - // // Processes the input observation to update `self.buffer` - // pyo3::Python::with_gil(|py| { - // debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "list"); - - // let obs: Py = obs.extract(py).unwrap(); - - // for (i, o) in (0..self.n_procs).zip(obs.as_ref(py).iter()) { - // let o = Self::get_ndarray(o); - // self.update_buffer(i, &o); - // } - // }); - - // // Returned values - // let array_views: Vec<_> = self.buffer.iter().map(|a| a.view()).collect(); - // let obs = PyGymEnvObs::from(stack(Axis(0), array_views.as_slice()).unwrap()); - // let obs = U::from(obs); - - // // TODO: add contents in the record - // let record = Record::empty(); - - // (obs, record) - } else { - // Update the buffer with obs - pyo3::Python::with_gil(|py| { - debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "ndarray"); - let o = Self::get_ndarray(obs.as_ref(py)); - self.update_buffer(0, &o); - }); - - // Returns stacked observation in the buffer - // img.shape() = [1, 4, 1, 84, 84] - // [batch_size, n_stack, color_ch, width, height] - let img = self.buffers[0].clone().unwrap().insert_axis(Axis(0)); - let data = img.iter().map(|&e| e.into()).collect::>(); - let shape = [img.shape()[3] * self.n_stack as usize, img.shape()[4]]; - - let obs = GymObs::from(img); - let obs = U::from(obs); - - // TODO: add contents in the record - let mut record = Record::empty(); - record.insert("frame_stack_filter_out", RecordValue::Array2(data, shape)); - - (obs, record) - } - } - - fn reset(&mut self, obs: PyObject) -> U { - if self.vectorized { - unimplemented!(); - // pyo3::Python::with_gil(|py| { - // debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "list"); - - // let obs: Py = obs.extract(py).unwrap(); - - // for (i, o) in (0..self.n_procs).zip(obs.as_ref(py).iter()) { - // if o.get_type().name().unwrap() != "NoneType" { - // let o = Self::get_ndarray(o); - // self.fill_buffer(i, &o); - // } - // } - // }); - - // // Returned values - // let array_views: Vec<_> = self.buffer.iter().map(|a| a.view()).collect(); - // O::from(stack(Axis(0), array_views.as_slice()).unwrap()) - } else { - // Update the buffer if obs is not None, otherwise do nothing - pyo3::Python::with_gil(|py| { - if obs.as_ref(py).get_type().name().unwrap() != "NoneType" { - debug_assert_eq!(obs.as_ref(py).get_type().name().unwrap(), "ndarray"); - let o = Self::get_ndarray(obs.as_ref(py)); - self.fill_buffer(0, &o); - } - }); - - // Returns stacked observation in the buffer - let frames = self.buffers[0].clone().unwrap().insert_axis(Axis(0)); - U::from(GymObs::from(frames)) - } - } -} diff --git a/border-py-gym-env/src/util.rs b/border-py-gym-env/src/util.rs index 15740b8a..117eed69 100644 --- a/border-py-gym-env/src/util.rs +++ b/border-py-gym-env/src/util.rs @@ -1,3 +1,4 @@ +//! Utility functions mainly for data conversion between Python and Rust. use ndarray::{concatenate, ArrayD, Axis}; use num_traits::cast::AsPrimitive; use numpy::{Element, PyArrayDyn}; diff --git a/border-tch-agent/src/dqn/base.rs b/border-tch-agent/src/dqn/base.rs index b43663ca..3efa4e06 100644 --- a/border-tch-agent/src/dqn/base.rs +++ b/border-tch-agent/src/dqn/base.rs @@ -341,6 +341,10 @@ where record } + /// Save model parameters in the given directory. + /// + /// The parameters of the model are saved as `qnet.pt`. + /// The parameters of the target model are saved as `qnet_tgt.pt`. fn save_params>(&self, path: T) -> Result<()> { // TODO: consider to rename the path if it already exists fs::create_dir_all(&path)?; diff --git a/border-tch-agent/src/dqn/model/base.rs b/border-tch-agent/src/dqn/model/base.rs index 4d67fa52..56180183 100644 --- a/border-tch-agent/src/dqn/model/base.rs +++ b/border-tch-agent/src/dqn/model/base.rs @@ -11,8 +11,12 @@ use serde::{de::DeserializeOwned, Serialize}; use std::{marker::PhantomData, path::Path}; use tch::{nn, Device, Tensor}; -#[allow(clippy::upper_case_acronyms)] -/// Represents value functions for DQN agents. +/// Action value function model for DQN. +/// +/// The architecture of the model is defined by the type parameter `Q`, +/// which should implement [`SubModel`]. +/// This takes [`SubModel::Input`] as input and outputs a tensor. +/// The output tensor should have the same dimension as the number of actions. pub struct DqnModel where Q: SubModel, @@ -75,7 +79,7 @@ where } } - /// Outputs the action-value given an observation. + /// Outputs the action-value given observation(s). pub fn forward(&self, x: &Q::Input) -> Tensor { let a = self.q.forward(&x); debug_assert_eq!(a.size().as_slice()[1], self.out_dim); diff --git a/border-tch-agent/src/dqn/model/config.rs b/border-tch-agent/src/dqn/model/config.rs index 554bfa26..ce31802c 100644 --- a/border-tch-agent/src/dqn/model/config.rs +++ b/border-tch-agent/src/dqn/model/config.rs @@ -8,7 +8,7 @@ use std::{ }; #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)] -/// Configuration of [DqnModel](super::DqnModel). +/// Configuration of [`DqnModel`](super::DqnModel). pub struct DqnModelConfig where // Q: SubModel, diff --git a/border-tch-agent/src/iqn/model/config.rs b/border-tch-agent/src/iqn/model/config.rs index 19d11879..858e1dcd 100644 --- a/border-tch-agent/src/iqn/model/config.rs +++ b/border-tch-agent/src/iqn/model/config.rs @@ -25,7 +25,7 @@ where } #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)] -/// Configuration of [IqnModel](super::IqnModel). +/// Configuration of [`IqnModel`](super::IqnModel). /// /// The type parameter `F` represents a configuration struct of a feature extractor. /// The type parameter `M` represents a configuration struct of a model for merging diff --git a/border-tch-agent/src/mlp/base.rs b/border-tch-agent/src/mlp/base.rs index 674f426b..40e513d4 100644 --- a/border-tch-agent/src/mlp/base.rs +++ b/border-tch-agent/src/mlp/base.rs @@ -2,7 +2,7 @@ use super::{mlp, MlpConfig}; use crate::model::{SubModel, SubModel2}; use tch::{nn, nn::Module, Device, Tensor}; -/// Multilayer perceptron. +/// Multilayer perceptron with ReLU activation function. pub struct Mlp { config: MlpConfig, device: Device, diff --git a/border-tch-agent/src/model/base.rs b/border-tch-agent/src/model/base.rs index 0b375fca..99753100 100644 --- a/border-tch-agent/src/model/base.rs +++ b/border-tch-agent/src/model/base.rs @@ -51,53 +51,61 @@ pub trait Model2: ModelBase { fn forward(&self, x1s: &Self::Input1, x2s: &Self::Input2) -> Self::Output; } -/// Neural network model that can be initialized with [VarStore] and configuration. +/// Neural network model that can be initialized with [`VarStore`] and configuration. /// /// The purpose of this trait is for modularity of neural network models. -/// Modules, which consists a neural network, should share [VarStore]. -/// To do this, structs implementing this trait can be initialized with a given [VarStore]. -/// This trait also provide the ability to clone with a given [VarStore]. +/// Modules, which consists a neural network, should share [`VarStore`]. +/// To do this, structs implementing this trait can be initialized with a given [`VarStore`]. +/// This trait also provide the ability to clone with a given [`VarStore`]. /// The ability is useful when creating a target network, used in recent deep learning algorithms in common. +/// +/// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html pub trait SubModel { - /// Configuration from which [SubModel] is constructed. + /// Configuration from which [`SubModel`] is constructed. type Config; - /// Input of the [SubModel]. + /// Input of the [`SubModel`]. type Input; - /// Output of the [SubModel]. + /// Output of the [`SubModel`]. type Output; - /// Builds [SubModel] with [VarStore] and [SubModel::Config]. + /// Builds [`SubModel`] with [`VarStore`] and [`SubModel::Config`]. fn build(var_store: &VarStore, config: Self::Config) -> Self; - /// Clones [SubModel] with [VarStore]. + /// Clones [`SubModel`] with [`VarStore`]. + /// + /// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html fn clone_with_var_store(&self, var_store: &VarStore) -> Self; /// A generalized forward function. fn forward(&self, input: &Self::Input) -> Self::Output; } -/// Neural network model that can be initialized with [VarStore] and configuration. +/// Neural network model that can be initialized with [`VarStore`] and configuration. /// -/// The difference from [SubModel] is that this trait takes two inputs. +/// The difference from [`SubModel`] is that this trait takes two inputs. +/// +/// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html pub trait SubModel2 { - /// Configuration from which [SubModel2] is constructed. + /// Configuration from which [`SubModel2`] is constructed. type Config; - /// Input of the [SubModel2]. + /// Input of the [`SubModel2`]. type Input1; - /// Input of the [SubModel2]. + /// Input of the [`SubModel2`]. type Input2; - /// Output of the [SubModel2]. + /// Output of the [`SubModel2`]. type Output; - /// Builds [SubModel2] with [VarStore] and [SubModel2::Config]. + /// Builds [`SubModel2`] with [VarStore] and [SubModel2::Config]. fn build(var_store: &VarStore, config: Self::Config) -> Self; - /// Clones [SubModel2] with [VarStore]. + /// Clones [`SubModel2`] with [`VarStore`]. + /// + /// [`VarStore`]: https://docs.rs/tch/0.16.0/tch/nn/struct.VarStore.html fn clone_with_var_store(&self, var_store: &VarStore) -> Self; /// A generalized forward function. diff --git a/border-tch-agent/src/opt.rs b/border-tch-agent/src/opt.rs index 7b48b9c4..6a077c69 100644 --- a/border-tch-agent/src/opt.rs +++ b/border-tch-agent/src/opt.rs @@ -60,6 +60,8 @@ impl OptimizerConfig { /// Optimizers. /// /// This is a thin wrapper of [tch::nn::Optimizer]. +/// +/// [tch::nn::Optimizer]: https://docs.rs/tch/0.16.0/tch/nn/struct.Optimizer.html pub enum Optimizer { /// Adam optimizer. Adam(Optimizer_), diff --git a/border-tch-agent/src/sac.rs b/border-tch-agent/src/sac.rs index bd2b31ea..bf3e8215 100644 --- a/border-tch-agent/src/sac.rs +++ b/border-tch-agent/src/sac.rs @@ -1,10 +1,156 @@ //! SAC agent. //! -//! Here is an example in `border/examples/sac_pendulum.rs` +//! Here is an example of creating SAC agent: //! -//! ```rust,ignore +//! ```no_run +//! # use anyhow::Result; +//! use border_core::{ +//! # Env as Env_, Obs as Obs_, Act as Act_, Step, test::{ +//! # TestAct as TestAct_, TestActBatch as TestActBatch_, +//! # TestEnv as TestEnv_, +//! # TestObs as TestObs_, TestObsBatch as TestObsBatch_, +//! # }, +//! # record::Record, +//! # generic_replay_buffer::{SimpleReplayBuffer, BatchBase}, +//! Configurable, +//! }; +//! use border_tch_agent::{ +//! sac::{ActorConfig, CriticConfig, Sac, SacConfig}, +//! mlp::{Mlp, Mlp2, MlpConfig}, +//! opt::OptimizerConfig +//! }; +//! +//! # struct TestEnv(TestEnv_); +//! # #[derive(Clone, Debug)] +//! # struct TestObs(TestObs_); +//! # #[derive(Clone, Debug)] +//! # struct TestAct(TestAct_); +//! # struct TestObsBatch(TestObsBatch_); +//! # struct TestActBatch(TestActBatch_); +//! # +//! # impl Obs_ for TestObs { +//! # fn dummy(n: usize) -> Self { +//! # Self(TestObs_::dummy(n)) +//! # } +//! # +//! # fn len(&self) -> usize { +//! # self.0.len() +//! # } +//! # } +//! # +//! # impl Into for TestObs { +//! # fn into(self) -> tch::Tensor { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl BatchBase for TestObsBatch { +//! # fn new(n: usize) -> Self { +//! # Self(TestObsBatch_::new(n)) +//! # } +//! # +//! # fn push(&mut self, ix: usize, data: Self) { +//! # self.0.push(ix, data.0); +//! # } +//! # +//! # fn sample(&self, ixs: &Vec) -> Self { +//! # Self(self.0.sample(ixs)) +//! # } +//! # } +//! # +//! # impl BatchBase for TestActBatch { +//! # fn new(n: usize) -> Self { +//! # Self(TestActBatch_::new(n)) +//! # } +//! # +//! # fn push(&mut self, ix: usize, data: Self) { +//! # self.0.push(ix, data.0); +//! # } +//! # +//! # fn sample(&self, ixs: &Vec) -> Self { +//! # Self(self.0.sample(ixs)) +//! # } +//! # } +//! # +//! # impl Act_ for TestAct { +//! # fn len(&self) -> usize { +//! # self.0.len() +//! # } +//! # } +//! # +//! # impl From for TestAct { +//! # fn from(t: tch::Tensor) -> Self { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl Into for TestAct { +//! # fn into(self) -> tch::Tensor { +//! # unimplemented!(); +//! # } +//! # } +//! # +//! # impl Env_ for TestEnv { +//! # type Config = ::Config; +//! # type Obs = TestObs; +//! # type Act = TestAct; +//! # type Info = ::Info; +//! # +//! # fn build(config: &Self::Config, seed: i64) -> Result { +//! # Ok(Self(TestEnv_::build(&config, seed).unwrap())) +//! # } +//! # +//! # fn step(&mut self, act: &TestAct) -> (Step, Record) { +//! # let (step, record) = self.0.step(&act.0); +//! # let step = Step { +//! # obs: TestObs(step.obs), +//! # act: TestAct(step.act), +//! # reward: step.reward, +//! # is_terminated: step.is_terminated, +//! # is_truncated: step.is_truncated, +//! # info: step.info, +//! # init_obs: TestObs(step.init_obs), +//! # }; +//! # (step, record) +//! # } +//! # +//! # fn reset(&mut self, is_done: Option<&Vec>) -> Result { +//! # Ok(TestObs(self.0.reset(is_done).unwrap())) +//! # } +//! # +//! # fn step_with_reset(&mut self, a: &TestAct) -> (Step, Record) { +//! # let (step, record) = self.0.step_with_reset(&a.0); +//! # let step = Step { +//! # obs: TestObs(step.obs), +//! # act: TestAct(step.act), +//! # reward: step.reward, +//! # is_terminated: step.is_terminated, +//! # is_truncated: step.is_truncated, +//! # info: step.info, +//! # init_obs: TestObs(step.init_obs), +//! # }; +//! # (step, record) +//! # } +//! # +//! # fn reset_with_index(&mut self, ix: usize) -> Result { +//! # Ok(TestObs(self.0.reset_with_index(ix).unwrap())) +//! # } +//! # } +//! # +//! # type Env = TestEnv; +//! # type ObsBatch = TestObsBatch; +//! # type ActBatch = TestActBatch; +//! # type ReplayBuffer = SimpleReplayBuffer; +//! # +//! const DIM_OBS: i64 = 3; +//! const DIM_ACT: i64 = 1; +//! const LR_ACTOR: f64 = 1e-3; +//! const LR_CRITIC: f64 = 1e-3; +//! const BATCH_SIZE: usize = 256; +//! //! fn create_agent(in_dim: i64, out_dim: i64) -> Sac { //! let device = tch::Device::cuda_if_available(); +//! //! let actor_config = ActorConfig::default() //! .opt_config(OptimizerConfig::Adam { lr: LR_ACTOR }) //! .out_dim(out_dim) @@ -12,25 +158,13 @@ //! let critic_config = CriticConfig::default() //! .opt_config(OptimizerConfig::Adam { lr: LR_CRITIC }) //! .q_config(MlpConfig::new(in_dim + out_dim, vec![64, 64], 1, true)); -//! let sac_config = SacConfig::default() +//! let sac_config = SacConfig::::default() //! .batch_size(BATCH_SIZE) -//! .min_transitions_warmup(N_TRANSITIONS_WARMUP) //! .actor_config(actor_config) //! .critic_config(critic_config) //! .device(device); //! Sac::build(sac_config) //! } -//! -//! fn train(max_opts: usize, model_dir: &str, eval_interval: usize) -> Result<()> { -//! let trainer = //... -//! let mut agent = create_agent(DIM_OBS, DIM_ACT); -//! let mut recorder = TensorboardRecorder::new(model_dir); -//! let mut evaluator = Evaluator::new(&env_config(), 0, N_EPISODES_PER_EVAL)?; -//! -//! trainer.train(&mut agent, &mut recorder, &mut evaluator)?; -//! -//! Ok(()) -//! } //! ``` mod actor; mod base; diff --git a/border-tch-agent/src/sac/actor/base.rs b/border-tch-agent/src/sac/actor/base.rs index 3cbf5129..756bbb77 100644 --- a/border-tch-agent/src/sac/actor/base.rs +++ b/border-tch-agent/src/sac/actor/base.rs @@ -10,8 +10,7 @@ use serde::{de::DeserializeOwned, Serialize}; use std::path::Path; use tch::{nn, Device, Tensor}; -#[allow(clippy::upper_case_acronyms)] -/// Represents a stochastic policy for SAC agents. +/// Stochastic policy for SAC agents. pub struct Actor

where P: SubModel, @@ -36,7 +35,7 @@ where P: SubModel, P::Config: DeserializeOwned + Serialize + OutDim, { - /// Constructs [Actor]. + /// Constructs [`Actor`]. pub fn build(config: ActorConfig, device: Device) -> Result> { let pi_config = config.pi_config.context("pi_config is not set.")?; let out_dim = pi_config.get_out_dim(); diff --git a/border-tch-agent/src/sac/actor/config.rs b/border-tch-agent/src/sac/actor/config.rs index 8d026a7f..05aec6e8 100644 --- a/border-tch-agent/src/sac/actor/config.rs +++ b/border-tch-agent/src/sac/actor/config.rs @@ -9,7 +9,7 @@ use std::{ #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)] -/// Configuration of [Actor](super::Actor). +/// Configuration of [`Actor`](super::Actor). pub struct ActorConfig { pub pi_config: Option

, pub opt_config: OptimizerConfig, diff --git a/border-tch-agent/src/sac/base.rs b/border-tch-agent/src/sac/base.rs index 8483cae6..03b4bffc 100644 --- a/border-tch-agent/src/sac/base.rs +++ b/border-tch-agent/src/sac/base.rs @@ -191,6 +191,10 @@ where ), ]) } + + pub fn get_policy_net(&self) -> &Actor

{ + &self.pi + } } impl Policy for Sac diff --git a/border-tch-agent/src/sac/config.rs b/border-tch-agent/src/sac/config.rs index fb998577..4686ee32 100644 --- a/border-tch-agent/src/sac/config.rs +++ b/border-tch-agent/src/sac/config.rs @@ -18,8 +18,7 @@ use std::{ }; use tch::Tensor; -/// Constructs [Sac](super::Sac). -#[allow(clippy::upper_case_acronyms)] +/// Configuration of [`Sac`](super::Sac). #[derive(Debug, Deserialize, Serialize, PartialEq)] pub struct SacConfig where diff --git a/border-tch-agent/src/sac/critic/config.rs b/border-tch-agent/src/sac/critic/config.rs index 5d0b2d8c..20045aa4 100644 --- a/border-tch-agent/src/sac/critic/config.rs +++ b/border-tch-agent/src/sac/critic/config.rs @@ -9,7 +9,7 @@ use std::{ #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)] -/// Configuration of [Critic](super::Critic). +/// Configuration of [`Critic`](super::Critic). pub struct CriticConfig { pub q_config: Option, pub opt_config: OptimizerConfig, diff --git a/border-tch-agent/src/tensor_batch.rs b/border-tch-agent/src/tensor_batch.rs index ebb257ee..e1c3d38c 100644 --- a/border-tch-agent/src/tensor_batch.rs +++ b/border-tch-agent/src/tensor_batch.rs @@ -1,7 +1,9 @@ use border_core::generic_replay_buffer::BatchBase; use tch::{Device, Tensor}; -/// Adds capability of constructing [Tensor] with a static method. +/// Adds capability of constructing [`Tensor`] with a static method. +/// +/// [`Tensor`]: https://docs.rs/tch/0.16.0/tch/struct.Tensor.html pub trait ZeroTensor { /// Constructs zero tensor. fn zeros(shape: &[i64]) -> Tensor; @@ -37,6 +39,8 @@ impl ZeroTensor for i64 { /// where `shape` is obtained from the data pushed at the first time via /// [`TensorBatch::push`] method. `[1..]` means that the first axis of the /// given data is ignored as it might be batch size. +/// +/// [`Tensor`]: https://docs.rs/tch/0.16.0/tch/struct.Tensor.html pub struct TensorBatch { buf: Option, capacity: i64, diff --git a/border-tch-agent/src/util.rs b/border-tch-agent/src/util.rs index 795df663..dcc3d3e1 100644 --- a/border-tch-agent/src/util.rs +++ b/border-tch-agent/src/util.rs @@ -21,9 +21,11 @@ pub enum CriticLoss { SmoothL1, } -/// Apply soft update on a model. +/// Apply soft update on variables. /// /// Variables are identified by their names. +/// +/// dest = tau * src + (1.0 - tau) * dest pub fn track(dest: &mut M, src: &mut M, tau: f64) { let src = &mut src.get_var_store().variables(); let dest = &mut dest.get_var_store().variables(); @@ -47,15 +49,16 @@ pub fn concat_slices(s1: &[i64], s2: &[i64]) -> Vec { v } -/// Returns the dimension of output vectors, i.e., the number of discrete outputs. +/// Interface for handling output dimensions. pub trait OutDim { - /// Returns the dimension of output vectors, i.e., the number of discrete outputs. + /// Returns the output dimension. fn get_out_dim(&self) -> i64; /// Sets the output dimension. fn set_out_dim(&mut self, v: i64); } +/// Returns the mean and standard deviation of the parameters. pub fn param_stats(var_store: &VarStore) -> Record { let mut record = Record::empty(); diff --git a/border/Cargo.toml b/border/Cargo.toml index 121e84b0..c888053c 100644 --- a/border/Cargo.toml +++ b/border/Cargo.toml @@ -109,6 +109,23 @@ path = "examples/mujoco/sac_mujoco_tch.rs" required-features = ["tch"] test = false +[[example]] +name = "convert_sac_policy_to_edge" +path = "examples/gym/convert_sac_policy_to_edge.rs" +required-features = ["border-tch-agent", "tch"] +test = false + +# [[example]] +# name = "sac_ant_async" +# path = "examples/mujoco/sac_ant_async.rs" +# required-features = ["tch", "border-async-trainer"] +# test = false + +[[example]] +name = "pendulum_edge" +path = "examples/gym/pendulum_edge.rs" +test = false + # [[example]] # name = "sac_ant_async" # path = "examples/mujoco/sac_ant_async.rs" @@ -133,6 +150,7 @@ border-derive = { version = "0.0.7", path = "../border-derive" } border-core = { version = "0.0.7", path = "../border-core" } border-tensorboard = { version = "0.0.7", path = "../border-tensorboard" } border-tch-agent = { version = "0.0.7", path = "../border-tch-agent" } +border-policy-no-backend = { version = "0.0.7", path = "../border-policy-no-backend" } border-py-gym-env = { version = "0.0.7", path = "../border-py-gym-env" } border-atari-env = { version = "0.0.7", path = "../border-atari-env" } border-candle-agent = { version = "0.0.7", path = "../border-candle-agent" } @@ -149,6 +167,7 @@ chrono = { workspace = true } tensorboard-rs = { workspace = true } thiserror = { workspace = true } serde_yaml = { workspace = true } +bincode = { workspace = true } [package.metadata.docs.rs] features = ["doc-only"] @@ -157,3 +176,4 @@ features = ["doc-only"] doc-only = ["tch/doc-only"] cuda = ["candle-core/cuda"] cudnn = ["candle-core/cudnn"] +border-tch-agent = [] diff --git a/border/examples/gym/convert_sac_policy_to_edge.rs b/border/examples/gym/convert_sac_policy_to_edge.rs new file mode 100644 index 00000000..8a7294ca --- /dev/null +++ b/border/examples/gym/convert_sac_policy_to_edge.rs @@ -0,0 +1,215 @@ +use anyhow::Result; +use border_core::{Agent, Configurable}; +use border_policy_no_backend::Mlp; +use border_tch_agent::{ + mlp, + model::ModelBase, + sac::{ActorConfig, CriticConfig, SacConfig}, +}; +use std::{fs, io::Write}; + +const DIM_OBS: i64 = 3; +const DIM_ACT: i64 = 1; + +// Dummy types +mod dummy { + use super::mlp::{Mlp, Mlp2}; + use border_tch_agent::sac::Sac as Sac_; + + #[derive(Clone, Debug)] + pub struct DummyObs; + + impl border_core::Obs for DummyObs { + fn dummy(_n: usize) -> Self { + unimplemented!(); + } + + fn len(&self) -> usize { + unimplemented!(); + } + } + + impl Into for DummyObs { + fn into(self) -> tch::Tensor { + unimplemented!(); + } + } + + #[derive(Clone, Debug)] + pub struct DummyAct; + + impl border_core::Act for DummyAct { + fn len(&self) -> usize { + unimplemented!(); + } + } + + impl Into for DummyAct { + fn into(self) -> tch::Tensor { + unimplemented!(); + } + } + + impl From for DummyAct { + fn from(_value: tch::Tensor) -> Self { + unimplemented!(); + } + } + + #[derive(Clone)] + pub struct DummyInnerBatch; + + impl Into for DummyInnerBatch { + fn into(self) -> tch::Tensor { + unimplemented!(); + } + } + + pub struct DummyBatch; + + impl border_core::TransitionBatch for DummyBatch { + type ObsBatch = DummyInnerBatch; + type ActBatch = DummyInnerBatch; + + fn len(&self) -> usize { + unimplemented!(); + } + + fn obs(&self) -> &Self::ObsBatch { + unimplemented!(); + } + + fn unpack( + self, + ) -> ( + Self::ObsBatch, + Self::ActBatch, + Self::ObsBatch, + Vec, + Vec, + Vec, + Option>, + Option>, + ) { + unimplemented!(); + } + } + + pub struct DummyReplayBuffer; + + impl border_core::ReplayBufferBase for DummyReplayBuffer { + type Batch = DummyBatch; + type Config = usize; + + fn batch(&mut self, _size: usize) -> anyhow::Result { + unimplemented!(); + } + + fn build(_config: &Self::Config) -> Self { + unimplemented!(); + } + + fn update_priority(&mut self, _ixs: &Option>, _td_err: &Option>) { + unimplemented!(); + } + } + + #[derive(Clone, Debug)] + pub struct DummyInfo; + + impl border_core::Info for DummyInfo {} + + pub struct DummyEnv; + + impl border_core::Env for DummyEnv { + type Config = usize; + type Act = DummyAct; + type Obs = DummyObs; + type Info = DummyInfo; + + fn build(_config: &Self::Config, _seed: i64) -> anyhow::Result + where + Self: Sized, + { + unimplemented!(); + } + + fn reset(&mut self, _is_done: Option<&Vec>) -> anyhow::Result { + unimplemented!(); + } + + fn reset_with_index(&mut self, _ix: usize) -> anyhow::Result { + unimplemented!(); + } + + fn step(&mut self, _a: &Self::Act) -> (border_core::Step, border_core::record::Record) + where + Self: Sized, + { + unimplemented!(); + } + + fn step_with_reset( + &mut self, + _a: &Self::Act, + ) -> (border_core::Step, border_core::record::Record) + where + Self: Sized, + { + unimplemented!(); + } + } + + pub type Env = DummyEnv; + pub type Sac = Sac_; +} + +use dummy::Sac; + +fn create_sac_config() -> SacConfig { + // Omit learning related parameters + let actor_config = ActorConfig::default() + .out_dim(DIM_ACT) + .pi_config(mlp::MlpConfig::new(DIM_OBS, vec![64, 64], DIM_ACT, false)); + let critic_config = CriticConfig::default().q_config(mlp::MlpConfig::new( + DIM_OBS + DIM_ACT, + vec![64, 64], + 1, + false, + )); + SacConfig::default() + .actor_config(actor_config) + .critic_config(critic_config) + .device(tch::Device::Cpu) +} + +fn main() -> Result<()> { + let src_path = "./border/examples/gym/model/tch/sac_pendulum/best"; + let dest_path = "./border/examples/gym/model/edge/sac_pendulum/best/mlp.bincode"; + + // Load Sac model + let sac = { + let config = create_sac_config(); + let mut sac = Sac::build(config); + sac.load_params(src_path)?; + sac + }; + + // Create Mlp + let mlp = { + let vs = sac.get_policy_net().get_var_store(); + let w_names = ["mlp.al0.weight", "mlp.al1.weight", "ml.weight"]; + let b_names = ["mlp.al0.bias", "mlp.al1.bias", "ml.bias"]; + Mlp::from_varstore(vs, &w_names, &b_names) + }; + + // Serialize to file + let encoded = bincode::serialize(&mlp)?; + let mut file = fs::OpenOptions::new() + .create(true) + .write(true) + .open(&dest_path)?; + file.write_all(&encoded)?; + + Ok(()) +} diff --git a/border/examples/gym/pendulum_edge.rs b/border/examples/gym/pendulum_edge.rs new file mode 100644 index 00000000..e81b4f97 --- /dev/null +++ b/border/examples/gym/pendulum_edge.rs @@ -0,0 +1,190 @@ +use anyhow::Result; +use border_core::{DefaultEvaluator, Evaluator as _}; +use border_policy_no_backend::{Mat, Mlp}; +use border_py_gym_env::{ + ArrayObsFilter, ContinuousActFilter, GymActFilter, GymEnv, GymEnvConfig, GymObsFilter, +}; +use clap::Parser; +use ndarray::ArrayD; +use std::fs; + +type PyObsDtype = f32; + +mod obs_act_types { + use super::*; + + #[derive(Clone, Debug)] + /// Observation type. + pub struct Obs(Mat); + + impl border_core::Obs for Obs { + fn dummy(_n: usize) -> Self { + Self(Mat::empty()) + } + + fn len(&self) -> usize { + self.0.shape()[0] as _ + } + } + + impl From> for Obs { + fn from(obs: ArrayD) -> Self { + let obs = obs.t().to_owned(); + let shape = obs.shape().iter().map(|e| *e as i32).collect(); + let data = obs.into_raw_vec(); + Self(Mat::new(data, shape)) + } + } + + impl From for Mat { + fn from(obs: Obs) -> Mat { + obs.0 + } + } + + #[derive(Clone, Debug)] + pub struct Act(Mat); + + impl border_core::Act for Act {} + + impl From for ArrayD { + fn from(value: Act) -> Self { + // let shape: Vec<_> = value.0.shape.iter().map(|e| *e as usize).collect(); + let shape = vec![(value.0.shape[0] * value.0.shape[1]) as usize]; + // let data = value.0.data; + let data: Vec = value.0.data.iter().map(|e| 2f32 * *e).collect(); + let t = ArrayD::from_shape_vec(shape, data).unwrap(); + t + } + } + + impl Into for Mat { + fn into(self) -> Act { + Act(self) + } + } +} + +mod policy { + use std::{io::Read, path::Path}; + + use super::*; + use border_core::Policy; + + pub struct MlpPolicy { + mlp: Mlp, + } + + impl Policy for MlpPolicy { + fn sample(&mut self, obs: &Obs) -> Act { + self.mlp.forward(&obs.clone().into()).into() + } + } + + impl MlpPolicy { + pub fn from_serialized_path(path: impl AsRef) -> Result { + let mut file = fs::OpenOptions::new().read(true).open(&path)?; + let mut buf = Vec::::new(); + let _ = file.read_to_end(&mut buf).unwrap(); + let mlp: Mlp = bincode::deserialize(&buf[..])?; + Ok(Self { mlp }) + } + } +} + +use obs_act_types::*; +use policy::*; + +type ObsFilter = ArrayObsFilter; +type ActFilter = ContinuousActFilter; +type Env = GymEnv; +type Evaluator = DefaultEvaluator; + +fn env_config() -> GymEnvConfig { + GymEnvConfig::::default() + .name("Pendulum-v1".to_string()) + .obs_filter_config(ObsFilter::default_config()) + .act_filter_config(ActFilter::default_config()) +} + +fn eval(n_episodes: usize, render: bool) -> Result<()> { + let env_config = { + let mut env_config = env_config(); + if render { + env_config = env_config + .render_mode(Some("human".to_string())) + .set_wait_in_millis(10); + }; + env_config + }; + let mut policy = MlpPolicy::from_serialized_path( + "./border/examples/gym/model/edge/sac_pendulum/best/mlp.bincode", + )?; + + let _ = Evaluator::new(&env_config, 0, n_episodes)?.evaluate(&mut policy); + + Ok(()) +} + +/// Train/eval SAC agent in pendulum environment +#[derive(Parser, Debug)] +#[command(version, about)] +struct Args { + /// Train SAC agent, not evaluate + #[arg(short, long, default_value_t = false)] + train: bool, + + /// Evaluate SAC agent, not train + #[arg(short, long, default_value_t = false)] + eval: bool, + + /// Log metrics with MLflow + #[arg(short, long, default_value_t = false)] + mlflow: bool, +} + +fn main() -> Result<()> { + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); + let _ = eval(5, true)?; + + // let args = Args::parse(); + + // if args.train { + // train( + // MAX_OPTS, + // "./border/examples/gym/model/tch/sac_pendulum", + // EVAL_INTERVAL, + // args.mlflow, + // )?; + // } else if args.eval { + // eval(5, true, "./border/examples/gym/model/tch/sac_pendulum/best")?; + // } else { + // train( + // MAX_OPTS, + // "./border/examples/gym/model/tch/sac_pendulum", + // EVAL_INTERVAL, + // args.mlflow, + // )?; + // eval(5, true, "./border/examples/gym/model/tch/sac_pendulum/best")?; + // } + + Ok(()) +} + +// #[cfg(test)] +// mod test { +// use super::*; +// use tempdir::TempDir; + +// #[test] +// fn test_sac_pendulum() -> Result<()> { +// tch::manual_seed(42); + +// let model_dir = TempDir::new("sac_pendulum_tch")?; +// let model_dir = model_dir.path().to_str().unwrap(); +// train(100, model_dir, 100, false)?; +// eval(1, false, (model_dir.to_string() + "/best").as_str())?; + +// Ok(()) +// } +// } diff --git a/docker/aarch64_doc/doc.sh b/docker/aarch64_doc/doc.sh index 4cf37c27..c823279f 100644 --- a/docker/aarch64_doc/doc.sh +++ b/docker/aarch64_doc/doc.sh @@ -3,4 +3,4 @@ docker run -it --rm \ --shm-size=512m \ --volume="$(pwd)/../..:/home/ubuntu/border" \ border_headless bash -l -c \ - "cd /home/ubuntu/border; CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ." + "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 LD_LIBRARY_PATH=$HOME/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ." diff --git a/docker/aarch64_headless/Dockerfile b/docker/aarch64_headless/Dockerfile index ace7c4a9..c3fc15b0 100644 --- a/docker/aarch64_headless/Dockerfile +++ b/docker/aarch64_headless/Dockerfile @@ -57,7 +57,7 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN cd /root && python3 -m venv venv RUN source /root/venv/bin/activate && pip3 install --upgrade pip RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions -RUN source /root/venv/bin/activate && pip3 install torch==1.12.0 +RUN source /root/venv/bin/activate && pip3 install torch==2.3.0 RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3 RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7 diff --git a/docker/amd64/Dockerfile b/docker/amd64/Dockerfile index 744586a7..fdbecf3b 100644 --- a/docker/amd64/Dockerfile +++ b/docker/amd64/Dockerfile @@ -73,7 +73,7 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN cd /root && python3 -m venv venv RUN source /root/venv/bin/activate && pip3 install --upgrade pip RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions -RUN source /root/venv/bin/activate && pip3 install torch==1.13.1 +RUN source /root/venv/bin/activate && pip3 install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu --timeout 300 RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3 RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7 @@ -95,12 +95,6 @@ RUN cd $HOME && mkdir -p .border/model # # PyBulletGym # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==3.2.5 # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==2.7.1 -# RUN source /home/ubuntu/venv/bin/activate && \ -# cd $HOME && \ -# git clone https://github.com/bulletphysics/bullet3.git && \ -# cd bullet3 && \ -# git checkout -b tmp 2c204c49e56ed15ec5fcfa71d199ab6d6570b3f5 && \ -# ./build_cmake_pybullet_double.sh # RUN cd $HOME && \ # git clone https://github.com/benelot/pybullet-gym.git && \ # cd pybullet-gym && \ @@ -121,7 +115,7 @@ RUN echo 'export CARGO_TARGET_DIR=$HOME/target' >> ~/.bashrc RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc RUN echo 'export MUJOCO_GL=glfw' >> ~/.bashrc RUN echo 'source $HOME/venv/bin/activate' >> ~/.bashrc -RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc +# RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc RUN rm /bin/sh && mv /bin/sh_tmp /bin/sh diff --git a/docker/amd64/build.sh b/docker/amd64/build.sh index 0eb76e0e..0936264c 100644 --- a/docker/amd64/build.sh +++ b/docker/amd64/build.sh @@ -1,2 +1,3 @@ #!/bin/bash docker build -t border . +#podman build -t border . diff --git a/docker/amd64/remove.sh b/docker/amd64/remove.sh index e7a325bc..3872196d 100644 --- a/docker/amd64/remove.sh +++ b/docker/amd64/remove.sh @@ -1 +1,2 @@ docker rm -f border +#podman rm -f border diff --git a/docker/amd64/run.sh b/docker/amd64/run.sh index c02762ca..be2f43f3 100644 --- a/docker/amd64/run.sh +++ b/docker/amd64/run.sh @@ -1,13 +1,13 @@ #!/bin/bash -# nvidia-docker run -it --rm \ -# --env="DISPLAY" \ -# --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ -# --volume="/home/taku-y:/home/taku-y" \ -# --name my_pybullet my_pybullet bash +nvidia-docker run -it --rm \ + --env="DISPLAY" \ + --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ + --volume="/home/taku-y:/home/taku-y" \ + --name my_pybullet my_pybullet bash -docker run -td \ - --name border \ - -p 6080:6080 \ - --shm-size=512m \ - --volume="$(pwd)/../..:/root/border" \ - border +# podman run -td \ +# --name border \ +# -p 6080:6080 \ +# --shm-size=512m \ +# --volume="$(pwd)/../..:/root/border" \ +# border diff --git a/docker/amd64_headless/Dockerfile b/docker/amd64_headless/Dockerfile index d460674d..c61e3ea0 100644 --- a/docker/amd64_headless/Dockerfile +++ b/docker/amd64_headless/Dockerfile @@ -1,13 +1,16 @@ -FROM ubuntu:focal-20221130 +FROM --platform=linux/amd64 ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive RUN echo "Set disable_coredump false" >> /etc/sudo.conf RUN apt-get update -q && \ apt-get upgrade -yq && \ - apt-get install -yq wget curl git build-essential vim sudo libssl-dev - -# lsb-release locales bash-completion tzdata gosu && \ -# RUN rm -rf /var/lib/apt/lists/* + apt-get install -yq wget +RUN apt-get install -yq curl +RUN apt-get install -yq git +RUN apt-get install -yq build-essential +RUN apt-get install -yq vim +# RUN apt-get install -yq sudo +RUN apt-get install -yq libssl-dev # clang RUN apt install -y -q libclang-dev @@ -18,7 +21,7 @@ RUN apt update -y && \ DEBIAN_FRONTEND=noninteractive && \ apt install -y -q --no-install-recommends \ libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev libsdl2-net-dev libsdl2-ttf-dev \ - libsdl-dev libsdl-image1.2-dev + libsdl-image1.2-dev libsdl1.2-dev # zip RUN apt install -y zip @@ -27,7 +30,7 @@ RUN apt install -y zip RUN apt install -y swig # python -RUN apt install -y python3.8 python3.8-dev python3.8-distutils python3.8-venv python3-pip +RUN apt install -y python3.10 python3.10-dev python3.10-distutils python3.10-venv python3-pip # cmake RUN apt install -y cmake @@ -44,32 +47,25 @@ RUN apt install -y patchelf libglfw3 libglfw3-dev # Cleanup RUN rm -rf /var/lib/apt/lists/* -# COPY test_mujoco_py.py /test_mujoco_py.py -# RUN chmod 777 /test_mujoco_py.py - -# Add user -RUN useradd --create-home --home-dir /home/ubuntu --shell /bin/bash --user-group --groups adm,sudo ubuntu && \ - echo ubuntu:ubuntu | chpasswd && \ - echo "ubuntu ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers - # Use bash RUN mv /bin/sh /bin/sh_tmp && ln -s /bin/bash /bin/sh -# User settings -USER ubuntu - # rustup RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y # python -RUN cd /home/ubuntu && python3 -m venv venv -RUN source /home/ubuntu/venv/bin/activate && pip3 install --upgrade pip -RUN source /home/ubuntu/venv/bin/activate && pip3 install pyyaml typing-extensions -RUN source /home/ubuntu/venv/bin/activate && pip3 install torch==1.12.0 -RUN source /home/ubuntu/venv/bin/activate && pip3 install ipython jupyterlab -RUN source /home/ubuntu/venv/bin/activate && pip3 install numpy==1.21.3 -RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium[box2d]==0.29.0 -RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium-robotics==1.2.2 +RUN cd /root && python3 -m venv venv +RUN source /root/venv/bin/activate && pip3 install --upgrade pip +RUN source /root/venv/bin/activate && pip3 install pyyaml typing-extensions +RUN source /root/venv/bin/activate && pip3 install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu --timeout 300 +RUN source /root/venv/bin/activate && pip3 install ipython jupyterlab +RUN source /root/venv/bin/activate && pip3 install numpy==1.21.3 +RUN source /root/venv/bin/activate && pip3 install mujoco==2.3.7 +RUN source /root/venv/bin/activate && pip3 install gymnasium[box2d]==0.29.0 +RUN source /root/venv/bin/activate && pip3 install gymnasium-robotics==1.2.2 +RUN source /root/venv/bin/activate && pip3 install tensorboard==2.16.2 +RUN source /root/venv/bin/activate && pip3 install tabulate==0.9.0 +RUN source /root/venv/bin/activate && pip3 install mlflow-export-import==1.2.0 # RUN source /home/ubuntu/venv/bin/activate && pip3 install robosuite==1.3.2 # RUN source /home/ubuntu/venv/bin/activate && pip3 install -U 'mujoco-py<2.2,>=2.1' # RUN source /home/ubuntu/venv/bin/activate && pip3 install dm-control==1.0.8 @@ -79,20 +75,6 @@ RUN source /home/ubuntu/venv/bin/activate && pip3 install gymnasium-robotics==1. # border RUN cd $HOME && mkdir -p .border/model -# Mujoco amd64 binary -RUN cd $HOME && \ - mkdir .mujoco && \ - cd .mujoco && \ - wget https://github.com/deepmind/mujoco/releases/download/2.1.1/mujoco-2.1.1-linux-x86_64.tar.gz -RUN cd $HOME/.mujoco && \ - tar zxf mujoco-2.1.1-linux-x86_64.tar.gz && \ - mkdir -p mujoco210/bin && \ - ln -sf $PWD/mujoco-2.1.1/lib/libmujoco.so.2.1.1 $PWD/mujoco210/bin/libmujoco210.so && \ - ln -sf $PWD/mujoco-2.1.1/lib/libglewosmesa.so $PWD/mujoco210/bin/libglewosmesa.so && \ - ln -sf $PWD/mujoco-2.1.1/include/ $PWD/mujoco210/include && \ - ln -sf $PWD/mujoco-2.1.1/model/ $PWD/mujoco210/model -# RUN cp /*.py $HOME - # # PyBulletGym # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==3.2.5 # # RUN source /home/ubuntu/venv/bin/activate && pip3 install pybullet==2.7.1 @@ -106,22 +88,22 @@ RUN cd $HOME/.mujoco && \ # RUN sed -i 's/return state, sum(self.rewards), bool(done), {}/return state, sum(self.rewards), bool(done), bool(done), {}/g' /home/ubuntu/pybullet-gym/pybulletgym/envs/roboschool/envs/locomotion/walker_base_env.py # RUN sed -i 's/id='\''AntPyBulletEnv-v0'\'',/id='\''AntPyBulletEnv-v0'\'', order_enforce=False,/g' /home/ubuntu/pybullet-gym/pybulletgym/envs/__init__.py -# Env vars -# RUN echo 'export LIBTORCH=$HOME/.local/lib/python3.8/site-packages/torch' >> ~/.bashrc -# RUN echo 'export LD_LIBRARY_PATH=$LIBTORCH/lib' >> ~/.bashrc -# RUN echo 'export PYTHONPATH=$HOME/border/border-py-gym-env/examples:$PYTHONPATH' >> ~/.bashrc -# RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc +# .bashrc +RUN echo 'export LIBTORCH=$HOME/venv/lib/python3.10/site-packages/torch' >> ~/.bashrc +RUN echo 'export LD_LIBRARY_PATH=$LIBTORCH/lib' >> ~/.bashrc +RUN echo 'export LIBTORCH_CXX11_ABI=0' >> ~/.bashrc RUN echo 'export PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc -ENV LIBTORCH_CXX11_ABI 0 -ENV LIBTORCH /home/ubuntu/venv/lib/python3.8/site-packages/torch -ENV LD_LIBRARY_PATH $LIBTORCH/lib -ENV PYTHONPATH /home/ubuntu/border/border-py-gym-env/examples:$PYTHONPATH +RUN echo 'export PYTHONPATH=$HOME/border/border-py-gym-env/examples:$PYTHONPATH' >> ~/.bashrc +RUN echo 'export CARGO_TARGET_DIR=$HOME/target' >> ~/.bashrc +RUN echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc +RUN echo 'export MUJOCO_GL=glfw' >> ~/.bashrc +RUN echo 'source $HOME/venv/bin/activate' >> ~/.bashrc +RUN echo 'export RUSTFLAGS="-C target-feature=+fp16"' >> ~/.bashrc -USER root RUN rm /bin/sh && mv /bin/sh_tmp /bin/sh -USER ubuntu -WORKDIR /home/ubuntu/border +# USER root +# WORKDIR /home/ubuntu/border # ENV USER ubuntu # CMD ["/bin/bash", "-l", "-c"] diff --git a/docker/amd64_headless/build.sh b/docker/amd64_headless/build.sh index 860261b3..86de24a2 100644 --- a/docker/amd64_headless/build.sh +++ b/docker/amd64_headless/build.sh @@ -1,2 +1,3 @@ #!/bin/bash docker build -t border_headless . +#podman build -t border_headless . diff --git a/docker/amd64_headless/doc.sh b/docker/amd64_headless/doc.sh index ac4d6098..ec32f92f 100644 --- a/docker/amd64_headless/doc.sh +++ b/docker/amd64_headless/doc.sh @@ -3,4 +3,11 @@ docker run -it --rm \ --shm-size=512m \ --volume="$(pwd)/../..:/home/ubuntu/border" \ border_headless bash -l -c \ - "CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ." + "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ." + +# podman run -it --rm \ +# --name border_headless \ +# --shm-size=512m \ +# --volume="$(pwd)/../..:/home/ubuntu/border" \ +# border_headless bash -l -c \ +# "cd /home/ubuntu/border; source /root/venv/bin/activate; LIBTORCH_USE_PYTORCH=1 CARGO_TARGET_DIR=/home/ubuntu/target cargo doc --no-deps --document-private-items; cp -r /home/ubuntu/target/doc ."