Skip to content

Commit

Permalink
Merge pull request #104 from laboroai/dev_0_0_7
Browse files Browse the repository at this point in the history
Ver. 0.0.7
  • Loading branch information
taku-y committed Sep 1, 2024
2 parents 6dda454 + c3f7b0e commit f55d1fd
Show file tree
Hide file tree
Showing 325 changed files with 15,017 additions and 5,882 deletions.
36 changes: 30 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
rust: [1.68.2]
python-version: [3.8]
rust: [1.76.0]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v2

Expand All @@ -40,16 +40,22 @@ jobs:
- if: matrix.os == 'ubuntu-latest'
name: Install gym (Ubuntu)
run: |
pip install --upgrade pip
pip install swig==4.2.1
pip install mujoco==2.3.7
pip install gymnasium[box2d]==0.29.0
pip install gymnasium==0.29.1
pip install gymnasium-robotics==1.2.2
pip install pybullet==3.2.5
pip install torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu
- if: matrix.os != 'ubuntu-latest'
name: Install gym (Windows/Mac)
run: |
pip install gymnasium[box2d]==0.29.0
pip install --upgrade pip
pip install swig==4.2.1
pip install gymnasium==0.29.1
pip install pybullet==3.2.5
pip install torch==2.3.0
- if: matrix.os == 'ubuntu-latest'
name: Install pybullet-gym
Expand All @@ -76,10 +82,28 @@ jobs:
run: cargo test -p border-py-gym-env

- if: matrix.os == 'ubuntu-latest'
name: Test border
name: Check env vars
run: printenv

- if: matrix.os == 'ubuntu-latest'
name: Test border examples
env:
LIBTORCH_USE_PYTORCH: 1
run: |
export LD_LIBRARY_PATH=`pip show torch | awk '/Location/ {print $2}'`/torch/lib:$LD_LIBRARY_PATH
printenv | grep LD_
sudo apt-get update
sudo apt-get install -y --no-install-recommends --fix-missing \
libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev libsdl2-net-dev libsdl2-ttf-dev \
libsdl2-dev libsdl-image1.2-dev
cargo test -p border --features=tch
cargo test --example dqn_cartpole_tch --features=tch
cargo test --example iqn_cartpole_tch --features=tch
cargo test --example sac_pendulum_tch --features=tch
cargo test --example dqn_cartpole --features=candle-core
cargo test --example sac_pendulum --features=candle-core
cd border-async-trainer; cargo test; cd ..
cd border-atari-env; cargo test; cd ..
cd border-candle-agent; cargo test; cd ..
cd border-tch-agent; cargo test; cd ..
cd border-policy-no-backend; cargo test --features=border-tch-agent; cd ..
cd border-py-gym-env; cargo test; cd ..
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ __pycache__
.vscode/**
doc/**

out/**
mlruns/**
32 changes: 25 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
# Changelog

## v0.0.6 (20??-??-??)
## v0.0.7 (20??-??-??)

### Added

* Support MLflow tracking (`border-mlflow-tracking`) (https://github.com/taku-y/border/issues/2).
* Add candle agent (`border-candle-agent`) (https://github.com/taku-y/border/issues/1).
* Add `Trainer::train_offline()` method for offline training (`border-core`) (https://github.com/taku-y/border/issues/18).
* Add crate `border-policy-no-backend`.

### Changed

* Take `self` in the signature of `push()` method of replay buffer (`border-core`).
* Fix a bug in `MlpConfig` (`border-tch-agent`).
* Bump the version of tch to 0.16.0 (`border-tch-agent`).
* Change the name of trait `StepProcessorBase` to `StepProcessor` (`border-core`).
* Change the environment API to include terminate/truncate flags (`border-core`) (https://github.com/taku-y/border/issues/10).
* Split policy trait into two traits, one for sampling (`Policy`) and the other for configuration (`Configurable`) (https://github.com/taku-y/border/issues/12).

## v0.0.6 (2023-09-19)

### Added

* Docker files (`border`).
* Singularity files (`border`)
* Script for GPUSOROBAN (#67)
* Singularity files (`border`).
* Script for GPUSOROBAN (#67).
* `Evaluator` trait in `border-core` (#70). It can be used to customize evaluation logic in `Trainer`.
* Example of asynchronous trainer for native Atari environment and DQN (`border/examples`).
* Move tensorboard recorder into a separate crate (`border-tensorboard`)
* Move tensorboard recorder into a separate crate (`border-tensorboard`).

### Changed

* Bump the version of tch-rs to 0.8.0 (`border-tch-agent`).
* Rename agents as following the convention in Rust (`border-tch-agent`).
* Bump the version of gym to 0.26 (`border-py-gym-env`)
* Remove the type parameter for array shape of gym environments (`border-py-gym-env`)
* Interface of Python-Gym interface (`border-py-gym-env`)
* Bump the version of gym to 0.26 (`border-py-gym-env`).
* Remove the type parameter for array shape of gym environments (`border-py-gym-env`).
* Interface of Python-Gym interface (`border-py-gym-env`).
31 changes: 27 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,58 @@
members = [
"border-core",
"border-tensorboard",
"border-mlflow-tracking",
"border-py-gym-env",
"border-tch-agent",
"border-candle-agent",
"border-derive",
"border-atari-env",
"border-async-trainer",
"border-policy-no-backend",
"border",
]
exclude = ["docker/"]

[workspace.package]
version = "0.0.7"
edition = "2018"
rust-version = "1.76"
description = "Reinforcement learning library"
repository = "https://github.com/laboroai/border"
keywords = ["Reinforcement learning"]
categories = ["science"]
license = "MIT OR Apache-2.0"

[workspace.dependencies]
clap = "2.33.3"
clap = { version = "4.5.8", features = ["derive"] }
csv = "1.1.5"
fastrand = "1.4.0"
tch = "0.8.0"
tch = "0.16.0"
anyhow = "1.0.38"
crossbeam-channel = "0.5.1"
serde_yaml = "0.8.7"
aquamarine = "0.1"
log = "0.4"
dirs = "3.0.2"
thiserror = "1.0"
serde = "=1.0.126"
serde = "1.0.194"
serde_json = "^1.0.114"
numpy = "0.14.1"
env_logger = "0.8.2"
tempdir = "0.3.7"
num-traits = "0.2.14"
tensorboard-rs = "0.2.4"
pyo3 = { version = "=0.14.5", default-features=false }
pyo3 = { version = "=0.14.5", default-features = false }
ndarray = "0.15.1"
chrono = "0.4"
segment-tree = "2.0.0"
image = "0.23.14"
candle-core = { version = "=0.4.1", feature = ["cuda", "cudnn"] }
candle-nn = "0.4.1"
rand = { version = "0.8.5", features = ["small_rng"] }
itertools = "0.12.1"
ordered-float = "4.2.0"
reqwest = { version = "0.11.26", features = ["json", "blocking"] }
xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
candle-optimisers = "0.4.0"
bincode = "1.3.3"
53 changes: 25 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,19 @@ A reinforcement learning library in Rust.

Border consists of the following crates:

* [border-core](https://crates.io/crates/border-core) provides basic traits and functions generic to environments and reinforcmenet learning (RL) agents.
* [border-tensorboard](https://crates.io/crates/border-tensorboard) has `TensorboardRecorder` struct to write records which can be shown in Tensorboard. It is based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
* [border-py-gym-env](https://crates.io/crates/border-py-gym-env) is a wrapper of the [Gymnasium](https://gymnasium.farama.org) environments written in Python.
* [border-atari-env](https://crates.io/crates/border-atari-env) is a wrapper of [atari-env](https://crates.io/crates/atari-env), which is a part of [gym-rs](https://crates.io/crates/gym-rs).
* [border-tch-agent](https://crates.io/crates/border-tch-agent) is a collection of RL agents based on [tch](https://crates.io/crates/tch), including Deep Q network (DQN), implicit quantile network (IQN), and soft actor critic (SAC).
* [border-async-trainer](https://crates.io/crates/border-async-trainer) defines some traits and functions for asynchronous training of RL agents by multiple actors, which runs sampling processes in parallel. In each sampling process, an agent interacts with an environment to collect samples to be sent to a shared replay buffer.

You can use a part of these crates for your purposes, though [border-core](https://crates.io/crates/border-core) is mandatory. [This crate](https://crates.io/crates/border) is just a collection of examples. See [Documentation](https://docs.rs/border) for more details.

## News

The owner of this repository will be changed from [taku-y](https://github.com/taku-y) to [laboroai](https://github.com/laboroai).
* Core and utility
* [border-core](https://crates.io/crates/border-core) provides basic traits and functions generic to environments and reinforcmenet learning (RL) agents.
* [border-tensorboard](https://crates.io/crates/border-tensorboard) has `TensorboardRecorder` struct to write records which can be shown in Tensorboard. It is based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
* [border-mlflow-tracking](https://crates.io/crates/border-mlflow-tracking) support MLflow tracking to log metrices during training via REST API.
* [border-async-trainer](https://crates.io/crates/border-async-trainer) defines some traits and functions for asynchronous training of RL agents by multiple actors, which runs sampling processes in parallel. In each sampling process, an agent interacts with an environment to collect samples to be sent to a shared replay buffer.
* [border](https://crates.io/crates/border) is just a collection of examples.
* Environment
* [border-py-gym-env](https://crates.io/crates/border-py-gym-env) is a wrapper of the [Gymnasium](https://gymnasium.farama.org) environments written in Python.
* [border-atari-env](https://crates.io/crates/border-atari-env) is a wrapper of [atari-env](https://crates.io/crates/atari-env), which is a part of [gym-rs](https://crates.io/crates/gym-rs).
* Agent
* [border-tch-agent](https://crates.io/crates/border-tch-agent) includes RL agents based on [tch](https://crates.io/crates/tch), including Deep Q network (DQN), implicit quantile network (IQN), and soft actor critic (SAC).
* [border-candle-agent](https://crates.io/crates/border-candle-agent) includes RL agents based on [candle](https://crates.io/crates/candle-core)
* [border-policy-no-backend](https://crates.io/crates/border-policy-no-backend) includes a policy that is independent of any deep learning backend, such as Torch.

## Status

Expand All @@ -34,21 +35,17 @@ There are some example sctipts in `border/examples` directory. These are tested

In `docker` directory, there are scripts for running a Docker container, in which you can try the examples described above. Currently, only `aarch64` is mainly used for the development.

## Tests

The following command has been tested in the Docker container running on M2 Macbook air.

```bash
cargo test --features=tch
```

## License

Crates | License
----------------------|------------------
`border-core` | MIT OR Apache-2.0
`border-py-gym-env` | MIT OR Apache-2.0
`border-atari-env` | GPL-2.0-or-later
`border-tch-agent` | MIT OR Apache-2.0
`border-async-trainer`| MIT OR Apache-2.0
`border` | GPL-2.0-or-later
Crates | License
--------------------------|------------------
`border-core` | MIT OR Apache-2.0
`border-tensorboard` | MIT OR Apache-2.0
`border-mlflow-tracking` | MIT OR Apache-2.0
`border-async-trainer` | MIT OR Apache-2.0
`border-py-gym-env` | MIT OR Apache-2.0
`border-atari-env` | GPL-2.0-or-later
`border-tch-agent` | MIT OR Apache-2.0
`border-candle-agent` | MIT OR Apache-2.0
`border-policy-no-backend`| MIT OR Apache-2.0
`border` | GPL-2.0-or-later
22 changes: 9 additions & 13 deletions border-async-trainer/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
[package]
name = "border-async-trainer"
version = "0.0.6"
authors = ["Taku Yoshioka <yoshioka@laboro.ai>"]
edition = "2018"
rust-version = "1.68.2"

description = "Atari environment based on gym-rs"
repository = "https://github.com/taku-y/border"
keywords = ["rl"]
categories = ["science"]
license = "MIT OR Apache-2.0"
version.workspace = true
edition.workspace = true
description.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
license.workspace = true
readme = "README.md"
autoexamples = false

[dependencies]
anyhow = { workspace = true }
aquamarine = { workspace = true }
border-core = { version = "0.0.6", path = "../border-core" }
border-tensorboard = { version = "0.0.6", path = "../border-tensorboard" }
border-core = { version = "0.0.7", path = "../border-core" }
border-tensorboard = { version = "0.0.7", path = "../border-tensorboard" }
serde = { workspace = true, features = ["derive"] }
log = { workspace = true }
tokio = { version = "1.14.0", features = ["full"] }
Expand Down
2 changes: 1 addition & 1 deletion border-async-trainer/src/actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
mod base;
mod stat;
pub use base::Actor;
pub use stat::{ActorStat, actor_stats_fmt};
pub use stat::{actor_stats_fmt, ActorStat};
Loading

0 comments on commit f55d1fd

Please sign in to comment.