Merge pull request #90 from taku-y/mlflow

Support Mlflow tracking
laboroai · Mar 17, 2024 · 7ce76a7 · 7ce76a7
2 parents 6dda454 + 2d4cfba
commit 7ce76a7
Show file tree

Hide file tree

Showing 42 changed files with 985 additions and 327 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,16 @@
 # Changelog
 
-## v0.0.6 (20??-??-??)
+## v0.0.7 (20??-??-??)
+
+### Added
+
+Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
+
+### Changed
+
+* Take `self` in the signature of `push()` method of replay buffer (`border-core`)
+
+## v0.0.6 (2023-09-19)
 
 ### Added
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -2,15 +2,27 @@
 members = [
     "border-core",
     "border-tensorboard",
+    "border-mlflow-tracking",
     "border-py-gym-env",
     "border-tch-agent",
+    "border-candle-agent",
     "border-derive",
     "border-atari-env",
     "border-async-trainer",
     "border",
 ]
 exclude = ["docker/"]
 
+[workspace.package]
+version = "0.0.6"
+edition = "2018"
+rust-version = "1.70"
+description = "Reinforcement learning library"
+repository = "https://github.com/taku-y/border"
+keywords = ["rl"]
+categories = ["science"]
+license = "MIT OR Apache-2.0"
+
 [workspace.dependencies]
 clap = "2.33.3"
 csv = "1.1.5"
@@ -23,7 +35,8 @@ aquamarine = "0.1"
 log = "0.4"
 dirs = "3.0.2"
 thiserror = "1.0"
-serde = "=1.0.126"
+serde = "1.0.194"
+serde_json = "^1.0.114"
 numpy = "0.14.1"
 env_logger = "0.8.2"
 tempdir = "0.3.7"
@@ -34,3 +47,6 @@ ndarray = "0.15.1"
 chrono = "0.4"
 segment-tree = "2.0.0"
 image = "0.23.14"
+candle-core = "0.2.2"
+candle-nn = "0.2.2"
+reqwest = { version = "0.11.26", features = ["json", "blocking"] }
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ Border consists of the following crates:
 * [border-atari-env](https://crates.io/crates/border-atari-env) is a wrapper of [atari-env](https://crates.io/crates/atari-env), which is a part of [gym-rs](https://crates.io/crates/gym-rs).
 * [border-tch-agent](https://crates.io/crates/border-tch-agent) is a collection of RL agents based on [tch](https://crates.io/crates/tch), including Deep Q network (DQN), implicit quantile network (IQN), and soft actor critic (SAC).
 * [border-async-trainer](https://crates.io/crates/border-async-trainer) defines some traits and functions for asynchronous training of RL agents by multiple actors, which runs sampling processes in parallel. In each sampling process, an agent interacts with an environment to collect samples to be sent to a shared replay buffer.
+* [border-mlflow-tracking](https://crates.io/crates/border-mlflow-tracking) support MLflow tracking to log metrices during training via REST API.
 
 You can use a part of these crates for your purposes, though [border-core](https://crates.io/crates/border-core) is mandatory. [This crate](https://crates.io/crates/border) is just a collection of examples. See [Documentation](https://docs.rs/border) for more details.
 

diff --git a/border-async-trainer/Cargo.toml b/border-async-trainer/Cargo.toml
@@ -1,17 +1,13 @@
 [package]
 name = "border-async-trainer"
-version = "0.0.6"
-authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
-edition = "2018"
-rust-version = "1.68.2"
-
-description = "Atari environment based on gym-rs"
-repository = "https://github.com/taku-y/border"
-keywords = ["rl"]
-categories = ["science"]
-license = "MIT OR Apache-2.0"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
 readme = "README.md"
-autoexamples = false
 
 [dependencies]
 anyhow = { workspace = true }

diff --git a/border-atari-env/Cargo.toml b/border-atari-env/Cargo.toml
@@ -1,17 +1,13 @@
 [package]
 name = "border-atari-env"
-version = "0.0.6"
-authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
-edition = "2018"
-rust-version = "1.68.2"
-
-description = "Atari environment based on gym-rs"
-repository = "https://github.com/taku-y/border"
-keywords = ["rl"]
-categories = ["science"]
-license = "GPL-2.0-or-later"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+package.license = "GPL-2.0-or-later"
 readme = "README.md"
-autoexamples = false
 
 [dependencies]
 anyhow = { workspace = true }

diff --git a/border-atari-env/src/util/test.rs b/border-atari-env/src/util/test.rs
@@ -45,7 +45,7 @@ impl SubBatch for ObsBatch {
     }
 
     #[inline]
-    fn push(&mut self, i: usize, data: &Self) {
+    fn push(&mut self, i: usize, data: Self) {
         unsafe {
             let src: *const u8 = &data.buf[0];
             let dst: *mut u8 = &mut self.buf[i * self.m];
@@ -100,7 +100,7 @@ impl SubBatch for ActBatch {
     }
 
     #[inline]
-    fn push(&mut self, i: usize, data: &Self) {
+    fn push(&mut self, i: usize, data: Self) {
         unsafe {
             let src: *const u8 = &data.buf[0];
             let dst: *mut u8 = &mut self.buf[i * self.m];

diff --git a/border-candle-agent/Cargo.toml b/border-candle-agent/Cargo.toml
@@ -0,0 +1,34 @@
+[package]
+name = "border-candle-agent"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
+readme = "README.md"
+
+[dependencies]
+border-core = { version = "0.0.6", path = "../border-core" }
+border-async-trainer = { version = "0.0.6", path = "../border-async-trainer", optional = true }
+serde = { workspace = true, features = ["derive"] }
+serde_yaml = { workspace = true }
+tensorboard-rs = { workspace = true }
+log = { workspace = true }
+thiserror = { workspace = true }
+anyhow = { workspace = true }
+chrono = { workspace = true }
+aquamarine = { workspace = true }
+candle-core = { workspace = true }
+fastrand = { workspace = true }
+segment-tree = { workspace = true }
+
+[dev-dependencies]
+tempdir = { workspace = true }
+
+# [package.metadata.docs.rs]
+# features = ["doc-only"]
+
+# [features]
+# doc-only = ["tch/doc-only"]
diff --git a/border-candle-agent/src/lib.rs b/border-candle-agent/src/lib.rs
diff --git a/border-core/Cargo.toml b/border-core/Cargo.toml
@@ -1,17 +1,13 @@
 [package]
 name = "border-core"
-version = "0.0.6"
-authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
-edition = "2018"
-rust-version = "1.68.2"
-
-description = "Reinforcement learning library"
-repository = "https://github.com/taku-y/border"
-keywords = ["rl"]
-categories = ["science"]
-license = "MIT OR Apache-2.0"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
 readme = "README.md"
-autoexamples = false
 
 [dependencies]
 serde = { workspace = true, features = ["derive"] }

diff --git a/border-core/src/replay_buffer/base.rs b/border-core/src/replay_buffer/base.rs
@@ -108,9 +108,9 @@ where
     fn push(&mut self, tr: Self::PushedItem) -> Result<()> {
         let len = tr.len(); // batch size
         let (obs, act, next_obs, reward, is_done, _, _) = tr.unpack();
-        self.obs.push(self.i, &obs);
-        self.act.push(self.i, &act);
-        self.next_obs.push(self.i, &next_obs);
+        self.obs.push(self.i, obs);
+        self.act.push(self.i, act);
+        self.next_obs.push(self.i, next_obs);
         self.push_reward(self.i, &reward);
         self.push_is_done(self.i, &is_done);
 

diff --git a/border-core/src/replay_buffer/subbatch.rs b/border-core/src/replay_buffer/subbatch.rs
@@ -6,7 +6,7 @@ pub trait SubBatch {
     fn new(capacity: usize) -> Self;
 
     /// Pushes the samples in `data`.
-    fn push(&mut self, i: usize, data: &Self);
+    fn push(&mut self, i: usize, data: Self);
 
     /// Takes samples in the batch.
     fn sample(&self, ixs: &Vec<usize>) -> Self;

diff --git a/border-derive/Cargo.toml b/border-derive/Cargo.toml
@@ -1,17 +1,13 @@
 [package]
 name = "border-derive"
-version = "0.0.6"
-authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
-edition = "2018"
-rust-version = "1.68.2"
-
-description = "Derive macros for observation and action in RL environments of border"
-repository = "https://github.com/taku-y/border"
-keywords = ["rl"]
-categories = ["science"]
-license = "MIT OR Apache-2.0"
-# readme = "README.md"
-autoexamples = false
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
+readme = "README.md"
 
 [lib]
 proc-macro = true

diff --git a/border-derive/src/subbatch.rs b/border-derive/src/subbatch.rs
@@ -32,8 +32,8 @@ fn tensor_sub_batch(ident: proc_macro2::Ident, field_type: syn::Type) -> proc_ma
                 Self(TensorSubBatch::new(capacity))
             }
 
-            fn push(&mut self, i: usize, data: &Self) {
-                self.0.push(i, &data.0)
+            fn push(&mut self, i: usize, data: Self) {
+                self.0.push(i, data.0)
             }
 
             fn sample(&self, ixs: &Vec<usize>) -> Self {

diff --git a/border-mlflow-tracking/Cargo.toml b/border-mlflow-tracking/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "border-mlflow-tracking"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
+readme = "README.md"
+
+[dependencies]
+border-core = { version = "0.0.6", path = "../border-core" }
+reqwest = { workspace = true }
+anyhow = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+log = { workspace = true }
+serde_json = { workspace = true }
+flatten-serde-json = "0.1.0"
+
+[dev-dependencies]
+env_logger = { workspace = true }
+
+[[example]]
+name = "tracking_basic"
+# test = true
diff --git a/border-mlflow-tracking/README.md b/border-mlflow-tracking/README.md
@@ -0,0 +1,105 @@
+Support [MLflow](https://mlflow.org) tracking to manage experiments.
+
+Before running the program using this crate, run a tracking server with the following command:
+
+```bash
+mlflow server --host 127.0.0.1 --port 8080
+```
+
+Then, training configurations and metrices can be logged to the tracking server.
+The following code is an example. Nested configuration parameters will be flattened,
+logged like `hyper_params.param1`, `hyper_params.param2`.
+
+```rust
+use anyhow::Result;
+use border_core::record::{Record, RecordValue, Recorder};
+use border_mlflow_tracking::MlflowTrackingClient;
+use serde::Serialize;
+
+// Nested Configuration struct
+#[derive(Debug, Serialize)]
+struct Config {
+    env_params: String,
+    hyper_params: HyperParameters,
+}
+
+#[derive(Debug, Serialize)]
+struct HyperParameters {
+    param1: i64,
+    param2: Param2,
+    param3: Param3,
+}
+
+#[derive(Debug, Serialize)]
+enum Param2 {
+    Variant1,
+    Variant2(f32),
+}
+
+#[derive(Debug, Serialize)]
+struct Param3 {
+    dataset_name: String,
+}
+
+fn main() -> Result<()> {
+    env_logger::init();
+
+    let config1 = Config {
+        env_params: "env1".to_string(),
+        hyper_params: HyperParameters {
+            param1: 0,
+            param2: Param2::Variant1,
+            param3: Param3 {
+                dataset_name: "a".to_string(),
+            },
+        },
+    };
+    let config2 = Config {
+        env_params: "env2".to_string(),
+        hyper_params: HyperParameters {
+            param1: 0,
+            param2: Param2::Variant2(3.0),
+            param3: Param3 {
+                dataset_name: "a".to_string(),
+            },
+        },
+    };
+
+    // Set experiment for runs
+    let client = MlflowTrackingClient::new("http://localhost:8080").set_experiment_id("Default")?;
+
+    // Create recorders for logging
+    let mut recorder_run1 = client.create_recorder("")?;
+    let mut recorder_run2 = client.create_recorder("")?;
+    recorder_run1.log_params(&config1)?;
+    recorder_run2.log_params(&config2)?;
+
+    // Logging while training
+    for opt_steps in 0..100 {
+        let opt_steps = opt_steps as f32;
+
+        // Create a record
+        let mut record = Record::empty();
+        record.insert("opt_steps", RecordValue::Scalar(opt_steps));
+        record.insert("Loss", RecordValue::Scalar((-1f32 * opt_steps).exp()));
+
+        // Log metrices in the record
+        recorder_run1.write(record);
+    }
+
+    // Logging while training
+    for opt_steps in 0..100 {
+        let opt_steps = opt_steps as f32;
+
+        // Create a record
+        let mut record = Record::empty();
+        record.insert("opt_steps", RecordValue::Scalar(opt_steps));
+        record.insert("Loss", RecordValue::Scalar((-0.5f32 * opt_steps).exp()));
+
+        // Log metrices in the record
+        recorder_run2.write(record);
+    }
+
+    Ok(())
+}
+```