Skip to content

Commit

Permalink
Merge pull request #19 from c0dearm/tensor-refactor
Browse files Browse the repository at this point in the history
fix!: refactored shape of tensors and traits for easier expression of…
  • Loading branch information
c0dearm committed Jul 9, 2022
2 parents 0daf964 + 72fa240 commit 2c1a081
Show file tree
Hide file tree
Showing 19 changed files with 1,020 additions and 983 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Internally it uses the [arrayfire](https://crates.io/crates/arrayfire) crate to

One clear benefit of this crate versus `PyTorch` is `Rust`'s strong type system. All operations performed on tensors during the graph build are checked at compile time for mathematical soundness, which means no runtime error after an hour of model training. **If it compiles, it works**. If at some point you make a mistake while building your made in hell nested computational graph, like for example on the shape of a tensor, you'll be stopped even before you can start feeling stupid.

Moreover, because constant and variable tensors are actually different types, the developer continuously has an overview of which resulting tensors contribute to the gradients and which not. What's more, the compiler will stop you from trying to compute the gradient of or with respect to a constant!
Moreover, because constant and variable tensors are actually different types, the developer continuously has an overview of which resulting tensors contribute to the gradients and which not. On top of that, the compiler will stop you from trying to compute the gradient of or with respect to a constant!

Another benefit when compared to other similar libraries is that the computation graph is eagerly evaluated, which means that the graph is **trully dynamic**. In other words, your next operations can be conditioned to the results of previous ones, and so you can have conditional branching while
building your graph.
Expand All @@ -37,14 +37,13 @@ mushin = "0.5"
The following is quite a self-explanatory example of the basic usage of **Mushin** to build computation graphs and get the derivatives back:
```rust
use mushin as mu;
use mu::Tensor;

fn main() {
let x = mu::eye::<1, 1, 2, 3>(3.0).freeze();
let w = mu::randn::<1, 1, 3, 2>();
let b = mu::fill::<1, 1, 3, 3>(0.0);

let z = w.mm(&x).add(&b);
let z = mu::add(&mu::mm(&w, &x), &b);
z.backward();

let dz_dw = w.grad()
Expand All @@ -61,8 +60,8 @@ use mu::nn::{layers::Linear, activations::relu, losses::mse, optimizers::SGD};
let x = mu::eye::<16, 1, 1, 3>(1.0).freeze();
let y = mu::eye::<16, 1, 1, 5>(3.0).freeze();

let linear = Linear::<16, 3, 5, _, _>::new();
let optim = SGD::new(&linear.parameters(), 0.01);
let linear = Linear::<16, 3, 5>::new();
let optim = SGD::new(&[linear.parameters()], 0.01);

for _ in 0..5 {
let z = relu(&linear.forward(&x));
Expand Down
85 changes: 85 additions & 0 deletions src/gen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use crate::tensor::{variable::Variable, Tensor};

/// Creates a variable tensor filled with the given value
#[must_use]
#[inline]
pub fn fill<const B: u64, const C: u64, const H: u64, const W: u64>(
v: f32,
) -> Tensor<B, C, H, W, Variable> {
Variable::from(arrayfire::constant!(v; H,W,C,B)).into()
}

/// Creates a variable tensor with the main diagonal filled with the given value, 0 everywhere else
#[must_use]
#[inline]
pub fn eye<const B: u64, const C: u64, const H: u64, const W: u64>(
v: f32,
) -> Tensor<B, C, H, W, Variable> {
Variable::from(v * arrayfire::identity::<f32>(arrayfire::dim4!(H, W, C, B))).into()
}

/// Creates a variable tensor with random values taken from a uniform distribution between [0,1]
#[must_use]
#[inline]
pub fn randu<const B: u64, const C: u64, const H: u64, const W: u64>(
) -> Tensor<B, C, H, W, Variable> {
Variable::from(arrayfire::randu!(H, W, C, B)).into()
}

/// Creates a variable tensor with random values taken from a normal distribution centered at 0
#[must_use]
#[inline]
pub fn randn<const B: u64, const C: u64, const H: u64, const W: u64>(
) -> Tensor<B, C, H, W, Variable> {
Variable::from(arrayfire::randn!(H, W, C, B)).into()
}

/// Creates a variable tensor from the given array of values
#[must_use]
#[inline]
pub fn custom<const B: u64, const C: u64, const H: u64, const W: u64>(
values: &[f32],
) -> Tensor<B, C, H, W, Variable> {
Variable::from(arrayfire::Array::new(values, arrayfire::dim4!(H, W, C, B))).into()
}

#[cfg(test)]
mod tests {
use super::{custom, eye, fill, randn, randu};
use crate::tensor::traits::Tensed;
use crate::tests::equal_data;
use arrayfire::{all_true_all, constant, dim4, identity, le};

#[test]
fn test_fill() {
let x = fill::<1, 2, 3, 4>(2.0);
assert!(equal_data(x.data(), constant!(2.0; 3,4,2,1)));
}

#[test]
fn test_eye() {
let x = eye::<1, 2, 3, 4>(2.0);
assert!(equal_data(
x.data(),
identity::<f32>(dim4!(3, 4, 2, 1)) * 2.0f32
));
}

#[test]
fn test_randu() {
let x = randu::<1, 2, 3, 4>();
assert!(all_true_all(&le(&x.data(), &constant!(1.0; 3,4,2,1), false)).0)
}

#[test]
fn test_randn() {
let x = randn::<1, 2, 3, 4>();
assert!(all_true_all(&le(&x.data(), &constant!(5.0; 3,4,2,1), false)).0)
}

#[test]
fn test_custom() {
let x = custom::<1, 1, 1, 1>(&[1.0]);
assert!(equal_data(x.data(), constant!(1.0;1,1,1,1)));
}
}
18 changes: 10 additions & 8 deletions src/graph/node.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use arrayfire::{constant, Array};
use std::cell::{Ref, RefCell, RefMut};
use std::rc::Rc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{
cell::{Ref, RefCell, RefMut},
rc::Rc,
sync::atomic::{AtomicUsize, Ordering},
};

static COUNTER: AtomicUsize = AtomicUsize::new(0);

Expand Down Expand Up @@ -250,16 +252,16 @@ impl BinaryOp {
#[cfg(test)]
pub(crate) mod tests {
use super::{Node, Origin};
use crate::tests::equal_arrays;
use crate::tests::equal_data;

#[test]
fn new_node() {
let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
assert!(equal_arrays(
assert!(equal_data(
node.data().clone(),
arrayfire::constant!(2.0; 1,2,3,4)
));
assert!(equal_arrays(
assert!(equal_data(
node.grad().clone(),
arrayfire::constant!(0.0; 1,2,3,4)
));
Expand Down Expand Up @@ -292,7 +294,7 @@ pub(crate) mod tests {
fn ones_grad() {
let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
node.ones_grad();
assert!(equal_arrays(
assert!(equal_data(
node.grad().clone(),
arrayfire::constant!(1.0; 1,2,3,4)
));
Expand All @@ -302,7 +304,7 @@ pub(crate) mod tests {
fn zero_grad() {
let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
node.zero_grad();
assert!(equal_arrays(
assert!(equal_data(
node.grad().clone(),
arrayfire::constant!(0.0; 1,2,3,4)
));
Expand Down
3 changes: 1 addition & 2 deletions src/graph/tape.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::graph::node::Node;
pub use crate::graph::node::{BinaryReverseFn, NodeId, UnaryReverseFn};
use crate::graph::node::{Node, NodeId};
use std::collections::{btree_map::Values, BTreeMap};
use std::rc::Rc;

Expand Down
122 changes: 21 additions & 101 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@
//! [reverse automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation).
//! In other words, what `PyTorch` is to `Python` is what `Mushin` is to `Rust`.
//!
//! All the operations on tensors use the excellent [arrayfire](https://arrayfire.com/)
//! library as a backend. Which means **Mushin can perform computations on any device**
//! (Nvidia CUDA GPUs, `OpenCL`, Intel MKL... ). Plus, all operations are checked at
//! compile time for mathematical correctness. I.e. You won't be able to add two tensors
//! of different shape/dimensions. The shape of the resulting tensors for all your
//! operations is tracked through the computation graph so in that regard we can offer
//! a guarantee that `Tensorflow` or `PyTorch` can't: **If it compiles, your computation
//! graph is guaranteed to be correct**
//! This crate is backed by [arrayfire](https://arrayfire.com/) to perform the tensor
//! operations on any device, namely Nvidia CUDA GPUs, `OpenCL`, Intel MKL... On top of that,
//! all operations are checked at compile time for mathematical correctness.
//! I.e. You won't be able to add two tensors of different shape/dimensions.
//! The shapes of the resulting tensors for all your operations are tracked through the computation
//! graph at compilation time so in that regard we can offer a guarantee that `Tensorflow`
//! or `PyTorch` can't: **If it compiles, your computation graph is guaranteed to be correct**
//!
//! ## Usage
//! ```rust
//! #![feature(generic_const_exprs)]
//!
//! use mushin as mu;
//! use mu::Tensor;
//!
//! let x = mu::eye::<1, 1, 2, 3>(3.0).freeze();
//! let w = mu::randn::<1, 1, 3, 2>();
//! let b = mu::fill::<1, 1, 3, 3>(0.0);
//!
//! let z = w.mm(&x).add(&b);
//! let z = mu::add(&mu::mm(&w, &x), &b);
//! z.backward();
//!
//! let dz_dw = w.grad();
Expand All @@ -37,11 +37,10 @@
//! on the result of previous operations.
//!
//! Mushin automatically keeps track of all the operations performed up until any given variable
//! and calling `backward()` in one of them traverses the computation graph in
//! [reverse mode](https://en.wikipedia.org/wiki/Automatic_differentiation) to accumulate the
//! gradients of all of its ancestor variables. By using the `grad()` method in any of them we can
//! now retrieve their gradients as new `Variable` tensor, which in turn can be used to compute
//! further gradients!
//! and calling `backward()` in one of them traverses the computation graph in reverse mode
//! to accumulate the gradients of all of its ancestor variables.
//! By using the `grad()` method in any of them we can now retrieve their gradients as new variable
//! tensor, which in turn can be used to compute further gradients!
//!
//! It is quite possible the reader is more interested in the Deep Learning utilities of this
//! library rather than the raw auto-grad foundations.
Expand All @@ -61,104 +60,25 @@
clippy::missing_inline_in_public_items
)]
#![allow(incomplete_features)]
#![feature(associated_const_equality)]
#![feature(generic_const_exprs)]
#![feature(associated_const_equality)]

#[cfg(feature = "nn")]
pub mod nn;

mod gen;
mod graph;
mod ops;
mod tensor;

use graph::{node::Node, tape::Tape};
use tensor::variable::Variable;

pub use tensor::Tensor;

/// Creates a `Variable` tensor filled with the given value
#[must_use]
#[inline]
pub fn fill<const B: u64, const C: u64, const H: u64, const W: u64>(
v: f32,
) -> Variable<B, C, H, W> {
let data = arrayfire::constant!(v; H,W,C,B);
Variable::new(Tape::default(), Node::declaration(data))
}

/// Creates a `Variable` tensor with the main diagonal filled with the given value, 0 everywhere else
#[must_use]
#[inline]
pub fn eye<const B: u64, const C: u64, const H: u64, const W: u64>(v: f32) -> Variable<B, C, H, W> {
let data = v * arrayfire::identity::<f32>(arrayfire::dim4!(H, W, C, B));
Variable::new(Tape::default(), Node::declaration(data))
}

/// Creates a `Variable` tensor with random values taken from a uniform distribution between [0,1]
#[must_use]
#[inline]
pub fn randu<const B: u64, const C: u64, const H: u64, const W: u64>() -> Variable<B, C, H, W> {
let data = arrayfire::randu!(H, W, C, B);
Variable::new(Tape::default(), Node::declaration(data))
}

/// Creates a `Variable` tensor with random values taken from a normal distribution centered at 0
#[must_use]
#[inline]
pub fn randn<const B: u64, const C: u64, const H: u64, const W: u64>() -> Variable<B, C, H, W> {
let data = arrayfire::randn!(H, W, C, B);
Variable::new(Tape::default(), Node::declaration(data))
}

/// Creates a `Variable` tensor from the given array of values
#[must_use]
#[inline]
pub fn custom<const B: u64, const C: u64, const H: u64, const W: u64>(
values: &[f32],
) -> Variable<B, C, H, W> {
let data = arrayfire::Array::new(values, arrayfire::dim4!(H, W, C, B));
Variable::new(Tape::default(), Node::declaration(data))
}
pub use gen::{custom, eye, fill, randn, randu};
pub use ops::{add, cos, div, mm, mul, reshape, sin, sub};

#[cfg(test)]
mod tests {
use crate as mu;
use arrayfire::{abs, all_true_all, constant, dim4, identity, le, Array};
use mu::Tensor;
use arrayfire::{abs, all_true_all, le, Array};

pub(crate) fn equal_arrays(x: Array<f32>, y: Array<f32>) -> bool {
pub(crate) fn equal_data(x: Array<f32>, y: Array<f32>) -> bool {
all_true_all(&le(&abs(&(x - y)), &1e-6, false)).0
}

#[test]
fn fill() {
let x = mu::fill::<1, 2, 3, 4>(2.0);
assert!(equal_arrays(x.data(), constant!(2.0; 3,4,2,1)));
}

#[test]
fn eye() {
let x = mu::eye::<1, 2, 3, 4>(2.0);
assert!(equal_arrays(
x.data(),
identity::<f32>(dim4!(3, 4, 2, 1)) * 2.0f32
));
}

#[test]
fn randu() {
let x = mu::randu::<1, 2, 3, 4>();
assert!(all_true_all(&le(&x.data(), &constant!(1.0; 3,4,2,1), false)).0)
}

#[test]
fn randn() {
let x = mu::randn::<1, 2, 3, 4>();
assert!(all_true_all(&le(&x.data(), &constant!(5.0; 3,4,2,1), false)).0)
}

#[test]
fn custom() {
let x = mu::custom::<1, 1, 1, 1>(&[1.0]);
assert!(equal_arrays(x.data(), constant!(1.0;1,1,1,1)));
}
}
Loading

0 comments on commit 2c1a081

Please sign in to comment.