Merge pull request #104 from laboroai/dev_0_0_7

Ver. 0.0.7
laboroai · Sep 1, 2024 · f55d1fd · f55d1fd
2 parents 6dda454 + c3f7b0e
commit f55d1fd
Show file tree

Hide file tree

Showing 325 changed files with 15,017 additions and 5,882 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -14,8 +14,8 @@ jobs:
       strategy:
         matrix:
           os: [ubuntu-latest, windows-latest, macOS-latest]
-          rust: [1.68.2]
-          python-version: [3.8]
+          rust: [1.76.0]
+          python-version: ["3.11"]
       steps:
         - uses: actions/checkout@v2
 
@@ -40,16 +40,22 @@ jobs:
         - if: matrix.os == 'ubuntu-latest'
           name: Install gym (Ubuntu)
           run: |
+            pip install --upgrade pip
+            pip install swig==4.2.1
             pip install mujoco==2.3.7
-            pip install gymnasium[box2d]==0.29.0
+            pip install gymnasium==0.29.1
             pip install gymnasium-robotics==1.2.2
             pip install pybullet==3.2.5
+            pip install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu
 
         - if: matrix.os != 'ubuntu-latest'
           name: Install gym (Windows/Mac)
           run: |
-            pip install gymnasium[box2d]==0.29.0
+            pip install --upgrade pip
+            pip install swig==4.2.1
+            pip install gymnasium==0.29.1
             pip install pybullet==3.2.5
+            pip install torch==2.3.0
 
         - if: matrix.os == 'ubuntu-latest'
           name: Install pybullet-gym
@@ -76,10 +82,28 @@ jobs:
           run: cargo test -p border-py-gym-env
 
         - if: matrix.os == 'ubuntu-latest'
-          name: Test border
+          name: Check env vars
+          run: printenv
+
+        - if: matrix.os == 'ubuntu-latest'
+          name: Test border examples
+          env:
+            LIBTORCH_USE_PYTORCH: 1
           run: |
+            export LD_LIBRARY_PATH=`pip show torch | awk '/Location/ {print $2}'`/torch/lib:$LD_LIBRARY_PATH
+            printenv | grep LD_
             sudo apt-get update
             sudo apt-get install -y --no-install-recommends --fix-missing \
               libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev libsdl2-net-dev libsdl2-ttf-dev \
               libsdl2-dev libsdl-image1.2-dev
-            cargo test -p border --features=tch
+            cargo test --example dqn_cartpole_tch --features=tch
+            cargo test --example iqn_cartpole_tch --features=tch
+            cargo test --example sac_pendulum_tch --features=tch
+            cargo test --example dqn_cartpole --features=candle-core
+            cargo test --example sac_pendulum --features=candle-core
+            cd border-async-trainer; cargo test; cd ..
+            cd border-atari-env; cargo test; cd ..
+            cd border-candle-agent; cargo test; cd ..
+            cd border-tch-agent; cargo test; cd ..
+            cd border-policy-no-backend; cargo test --features=border-tch-agent; cd ..
+            cd border-py-gym-env; cargo test; cd ..
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,5 @@ __pycache__
 .vscode/**
 doc/**
 
+out/**
+mlruns/**
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,20 +1,38 @@
 # Changelog
 
-## v0.0.6 (20??-??-??)
+## v0.0.7 (20??-??-??)
+
+### Added
+
+* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
+* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1).
+* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18).
+* Add crate `border-policy-no-backend`.
+
+### Changed
+
+* Take `self` in the signature of `push()` method of replay buffer (`border-core`).
+* Fix a bug in `MlpConfig` (`border-tch-agent`).
+* Bump the version of tch to 0.16.0 (`border-tch-agent`).
+* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`).
+* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10).
+* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12).
+
+## v0.0.6 (2023-09-19)
 
 ### Added
 
 * Docker files (`border`).
-* Singularity files (`border`)
-* Script for GPUSOROBAN (#67)
+* Singularity files (`border`).
+* Script for GPUSOROBAN (#67).
 * `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`.
 * Example of asynchronous trainer for native Atari environment and DQN (`border/examples`).
-* Move tensorboard recorder into a separate crate (`border-tensorboard`)
+* Move tensorboard recorder into a separate crate (`border-tensorboard`).
 
 ### Changed
 
 * Bump the version of tch-rs to 0.8.0 (`border-tch-agent`).
 * Rename agents as following the convention in Rust (`border-tch-agent`).
-* Bump the version of gym to 0.26 (`border-py-gym-env`)
-* Remove the type parameter for array shape of gym environments (`border-py-gym-env`)
-* Interface of Python-Gym interface (`border-py-gym-env`)
+* Bump the version of gym to 0.26 (`border-py-gym-env`).
+* Remove the type parameter for array shape of gym environments (`border-py-gym-env`).
+* Interface of Python-Gym interface (`border-py-gym-env`).
diff --git a/Cargo.toml b/Cargo.toml
@@ -2,35 +2,58 @@
 members = [
     "border-core",
     "border-tensorboard",
+    "border-mlflow-tracking",
     "border-py-gym-env",
     "border-tch-agent",
+    "border-candle-agent",
     "border-derive",
     "border-atari-env",
     "border-async-trainer",
+    "border-policy-no-backend",
     "border",
 ]
 exclude = ["docker/"]
 
+[workspace.package]
+version = "0.0.7"
+edition = "2018"
+rust-version = "1.76"
+description = "Reinforcement learning library"
+repository = "https://github.com/laboroai/border"
+keywords = ["Reinforcement learning"]
+categories = ["science"]
+license = "MIT OR Apache-2.0"
+
 [workspace.dependencies]
-clap = "2.33.3"
+clap = { version = "4.5.8", features = ["derive"] }
 csv = "1.1.5"
 fastrand = "1.4.0"
-tch = "0.8.0"
+tch = "0.16.0"
 anyhow = "1.0.38"
 crossbeam-channel = "0.5.1"
 serde_yaml = "0.8.7"
 aquamarine = "0.1"
 log = "0.4"
 dirs = "3.0.2"
 thiserror = "1.0"
-serde = "=1.0.126"
+serde = "1.0.194"
+serde_json = "^1.0.114"
 numpy = "0.14.1"
 env_logger = "0.8.2"
 tempdir = "0.3.7"
 num-traits = "0.2.14"
 tensorboard-rs = "0.2.4"
-pyo3 = { version = "=0.14.5", default-features=false }
+pyo3 = { version = "=0.14.5", default-features = false }
 ndarray = "0.15.1"
 chrono = "0.4"
 segment-tree = "2.0.0"
 image = "0.23.14"
+candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] }
+candle-nn = "0.4.1"
+rand = { version = "0.8.5", features = ["small_rng"] }
+itertools = "0.12.1"
+ordered-float = "4.2.0"
+reqwest = { version = "0.11.26", features = ["json", "blocking"] }
+xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
+candle-optimisers = "0.4.0"
+bincode = "1.3.3"
diff --git a/README.md b/README.md
@@ -9,18 +9,19 @@ A reinforcement learning library in Rust.
 
 Border consists of the following crates:
 
-* [border-core](https://crates.io/crates/border-core) provides basic traits and functions generic to environments and reinforcmenet learning (RL) agents.
-* [border-tensorboard](https://crates.io/crates/border-tensorboard) has `TensorboardRecorder` struct to write records which can be shown in Tensorboard. It is based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
-* [border-py-gym-env](https://crates.io/crates/border-py-gym-env) is a wrapper of the [Gymnasium](https://gymnasium.farama.org) environments written in Python.
-* [border-atari-env](https://crates.io/crates/border-atari-env) is a wrapper of [atari-env](https://crates.io/crates/atari-env), which is a part of [gym-rs](https://crates.io/crates/gym-rs).
-* [border-tch-agent](https://crates.io/crates/border-tch-agent) is a collection of RL agents based on [tch](https://crates.io/crates/tch), including Deep Q network (DQN), implicit quantile network (IQN), and soft actor critic (SAC).
-* [border-async-trainer](https://crates.io/crates/border-async-trainer) defines some traits and functions for asynchronous training of RL agents by multiple actors, which runs sampling processes in parallel. In each sampling process, an agent interacts with an environment to collect samples to be sent to a shared replay buffer.
-
-You can use a part of these crates for your purposes, though [border-core](https://crates.io/crates/border-core) is mandatory. [This crate](https://crates.io/crates/border) is just a collection of examples. See [Documentation](https://docs.rs/border) for more details.
-
-## News
-
-The owner of this repository will be changed from [taku-y](https://github.com/taku-y) to [laboroai](https://github.com/laboroai).
+* Core and utility
+  * [border-core](https://crates.io/crates/border-core) provides basic traits and functions generic to environments and reinforcmenet learning (RL) agents.
+  * [border-tensorboard](https://crates.io/crates/border-tensorboard) has `TensorboardRecorder` struct to write records which can be shown in Tensorboard. It is based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
+  * [border-mlflow-tracking](https://crates.io/crates/border-mlflow-tracking) support MLflow tracking to log metrices during training via REST API.
+  * [border-async-trainer](https://crates.io/crates/border-async-trainer) defines some traits and functions for asynchronous training of RL agents by multiple actors, which runs sampling processes in parallel. In each sampling process, an agent interacts with an environment to collect samples to be sent to a shared replay buffer.
+  * [border](https://crates.io/crates/border) is just a collection of examples.
+* Environment
+  * [border-py-gym-env](https://crates.io/crates/border-py-gym-env) is a wrapper of the [Gymnasium](https://gymnasium.farama.org) environments written in Python.
+  * [border-atari-env](https://crates.io/crates/border-atari-env) is a wrapper of [atari-env](https://crates.io/crates/atari-env), which is a part of [gym-rs](https://crates.io/crates/gym-rs).
+* Agent
+  * [border-tch-agent](https://crates.io/crates/border-tch-agent) includes RL agents based on [tch](https://crates.io/crates/tch), including Deep Q network (DQN), implicit quantile network (IQN), and soft actor critic (SAC).
+  * [border-candle-agent](https://crates.io/crates/border-candle-agent) includes RL agents based on [candle](https://crates.io/crates/candle-core)
+  * [border-policy-no-backend](https://crates.io/crates/border-policy-no-backend) includes a policy that is independent of any deep learning backend, such as Torch.
 
 ## Status
 
@@ -34,21 +35,17 @@ There are some example sctipts in `border/examples` directory. These are tested
 
 In `docker` directory, there are scripts for running a Docker container, in which you can try the examples described above. Currently, only `aarch64` is mainly used for the development.
 
-## Tests
-
-The following command has been tested in the Docker container running on M2 Macbook air.
-
-```bash
-cargo test --features=tch
-```
-
 ## License
 
-Crates                | License
-----------------------|------------------
-`border-core`         | MIT OR Apache-2.0
-`border-py-gym-env`   | MIT OR Apache-2.0
-`border-atari-env`    | GPL-2.0-or-later
-`border-tch-agent`    | MIT OR Apache-2.0
-`border-async-trainer`| MIT OR Apache-2.0
-`border`              | GPL-2.0-or-later
+Crates                    | License
+--------------------------|------------------
+`border-core`             | MIT OR Apache-2.0
+`border-tensorboard`      | MIT OR Apache-2.0
+`border-mlflow-tracking`  | MIT OR Apache-2.0
+`border-async-trainer`    | MIT OR Apache-2.0
+`border-py-gym-env`       | MIT OR Apache-2.0
+`border-atari-env`        | GPL-2.0-or-later
+`border-tch-agent`        | MIT OR Apache-2.0
+`border-candle-agent`     | MIT OR Apache-2.0
+`border-policy-no-backend`| MIT OR Apache-2.0
+`border`                  | GPL-2.0-or-later
diff --git a/border-async-trainer/Cargo.toml b/border-async-trainer/Cargo.toml
@@ -1,23 +1,19 @@
 [package]
 name = "border-async-trainer"
-version = "0.0.6"
-authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
-edition = "2018"
-rust-version = "1.68.2"
-
-description = "Atari environment based on gym-rs"
-repository = "https://github.com/taku-y/border"
-keywords = ["rl"]
-categories = ["science"]
-license = "MIT OR Apache-2.0"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
 readme = "README.md"
-autoexamples = false
 
 [dependencies]
 anyhow = { workspace = true }
 aquamarine = { workspace = true }
-border-core = { version = "0.0.6", path = "../border-core" }
-border-tensorboard = { version = "0.0.6", path = "../border-tensorboard" }
+border-core = { version = "0.0.7", path = "../border-core" }
+border-tensorboard = { version = "0.0.7", path = "../border-tensorboard" }
 serde = { workspace = true, features = ["derive"] }
 log = { workspace = true }
 tokio = { version = "1.14.0", features = ["full"] }

diff --git a/border-async-trainer/src/actor.rs b/border-async-trainer/src/actor.rs
@@ -2,4 +2,4 @@
 mod base;
 mod stat;
 pub use base::Actor;
-pub use stat::{ActorStat, actor_stats_fmt};
+pub use stat::{actor_stats_fmt, ActorStat};