Merge pull request #19 from c0dearm/tensor-refactor

fix!: refactored shape of tensors and traits for easier expression of…
c0dearm · Jul 9, 2022 · 2c1a081 · 2c1a081
2 parents 0daf964 + 72fa240
commit 2c1a081
Show file tree

Hide file tree

Showing 19 changed files with 1,020 additions and 983 deletions.
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Internally it uses the [arrayfire](https://crates.io/crates/arrayfire) crate to
 
 One clear benefit of this crate versus `PyTorch` is `Rust`'s strong type system. All operations performed on tensors during the graph build are checked at compile time for mathematical soundness, which means no runtime error after an hour of model training. **If it compiles, it works**. If at some point you make a mistake while building your made in hell nested computational graph, like for example on the shape of a tensor, you'll be stopped even before you can start feeling stupid.
 
-Moreover, because constant and variable tensors are actually different types, the developer continuously has an overview of which resulting tensors contribute to the gradients and which not. What's more, the compiler will stop you from trying to compute the gradient of or with respect to a constant!
+Moreover, because constant and variable tensors are actually different types, the developer continuously has an overview of which resulting tensors contribute to the gradients and which not. On top of that, the compiler will stop you from trying to compute the gradient of or with respect to a constant!
 
 Another benefit when compared to other similar libraries is that the computation graph is eagerly evaluated, which means that the graph is **trully dynamic**. In other words, your next operations can be conditioned to the results of previous ones, and so you can have conditional branching while
 building your graph.
@@ -37,14 +37,13 @@ mushin = "0.5"
 The following is quite a self-explanatory example of the basic usage of **Mushin** to build computation graphs and get the derivatives back:
 ```rust
 use mushin as mu;
-use mu::Tensor;
 
 fn main() {
     let x = mu::eye::<1, 1, 2, 3>(3.0).freeze();
     let w = mu::randn::<1, 1, 3, 2>();
     let b = mu::fill::<1, 1, 3, 3>(0.0);
 
-    let z = w.mm(&x).add(&b);
+    let z = mu::add(&mu::mm(&w, &x), &b);
     z.backward();
 
     let dz_dw = w.grad()
@@ -61,8 +60,8 @@ use mu::nn::{layers::Linear, activations::relu, losses::mse, optimizers::SGD};
 let x = mu::eye::<16, 1, 1, 3>(1.0).freeze();
 let y = mu::eye::<16, 1, 1, 5>(3.0).freeze();
 
-let linear = Linear::<16, 3, 5, _, _>::new();
-let optim = SGD::new(&linear.parameters(), 0.01);
+let linear = Linear::<16, 3, 5>::new();
+let optim = SGD::new(&[linear.parameters()], 0.01);
 
 for _ in 0..5 {
     let z = relu(&linear.forward(&x));

diff --git a/src/gen.rs b/src/gen.rs
@@ -0,0 +1,85 @@
+use crate::tensor::{variable::Variable, Tensor};
+
+/// Creates a variable tensor filled with the given value
+#[must_use]
+#[inline]
+pub fn fill<const B: u64, const C: u64, const H: u64, const W: u64>(
+    v: f32,
+) -> Tensor<B, C, H, W, Variable> {
+    Variable::from(arrayfire::constant!(v; H,W,C,B)).into()
+}
+
+/// Creates a variable tensor with the main diagonal filled with the given value, 0 everywhere else
+#[must_use]
+#[inline]
+pub fn eye<const B: u64, const C: u64, const H: u64, const W: u64>(
+    v: f32,
+) -> Tensor<B, C, H, W, Variable> {
+    Variable::from(v * arrayfire::identity::<f32>(arrayfire::dim4!(H, W, C, B))).into()
+}
+
+/// Creates a variable tensor with random values taken from a uniform distribution between [0,1]
+#[must_use]
+#[inline]
+pub fn randu<const B: u64, const C: u64, const H: u64, const W: u64>(
+) -> Tensor<B, C, H, W, Variable> {
+    Variable::from(arrayfire::randu!(H, W, C, B)).into()
+}
+
+/// Creates a variable tensor with random values taken from a normal distribution centered at 0
+#[must_use]
+#[inline]
+pub fn randn<const B: u64, const C: u64, const H: u64, const W: u64>(
+) -> Tensor<B, C, H, W, Variable> {
+    Variable::from(arrayfire::randn!(H, W, C, B)).into()
+}
+
+/// Creates a variable tensor from the given array of values
+#[must_use]
+#[inline]
+pub fn custom<const B: u64, const C: u64, const H: u64, const W: u64>(
+    values: &[f32],
+) -> Tensor<B, C, H, W, Variable> {
+    Variable::from(arrayfire::Array::new(values, arrayfire::dim4!(H, W, C, B))).into()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{custom, eye, fill, randn, randu};
+    use crate::tensor::traits::Tensed;
+    use crate::tests::equal_data;
+    use arrayfire::{all_true_all, constant, dim4, identity, le};
+
+    #[test]
+    fn test_fill() {
+        let x = fill::<1, 2, 3, 4>(2.0);
+        assert!(equal_data(x.data(), constant!(2.0; 3,4,2,1)));
+    }
+
+    #[test]
+    fn test_eye() {
+        let x = eye::<1, 2, 3, 4>(2.0);
+        assert!(equal_data(
+            x.data(),
+            identity::<f32>(dim4!(3, 4, 2, 1)) * 2.0f32
+        ));
+    }
+
+    #[test]
+    fn test_randu() {
+        let x = randu::<1, 2, 3, 4>();
+        assert!(all_true_all(&le(&x.data(), &constant!(1.0; 3,4,2,1), false)).0)
+    }
+
+    #[test]
+    fn test_randn() {
+        let x = randn::<1, 2, 3, 4>();
+        assert!(all_true_all(&le(&x.data(), &constant!(5.0; 3,4,2,1), false)).0)
+    }
+
+    #[test]
+    fn test_custom() {
+        let x = custom::<1, 1, 1, 1>(&[1.0]);
+        assert!(equal_data(x.data(), constant!(1.0;1,1,1,1)));
+    }
+}
diff --git a/src/graph/node.rs b/src/graph/node.rs
@@ -1,7 +1,9 @@
 use arrayfire::{constant, Array};
-use std::cell::{Ref, RefCell, RefMut};
-use std::rc::Rc;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{
+    cell::{Ref, RefCell, RefMut},
+    rc::Rc,
+    sync::atomic::{AtomicUsize, Ordering},
+};
 
 static COUNTER: AtomicUsize = AtomicUsize::new(0);
 
@@ -250,16 +252,16 @@ impl BinaryOp {
 #[cfg(test)]
 pub(crate) mod tests {
     use super::{Node, Origin};
-    use crate::tests::equal_arrays;
+    use crate::tests::equal_data;
 
     #[test]
     fn new_node() {
         let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
-        assert!(equal_arrays(
+        assert!(equal_data(
             node.data().clone(),
             arrayfire::constant!(2.0; 1,2,3,4)
         ));
-        assert!(equal_arrays(
+        assert!(equal_data(
             node.grad().clone(),
             arrayfire::constant!(0.0; 1,2,3,4)
         ));
@@ -292,7 +294,7 @@ pub(crate) mod tests {
     fn ones_grad() {
         let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
         node.ones_grad();
-        assert!(equal_arrays(
+        assert!(equal_data(
             node.grad().clone(),
             arrayfire::constant!(1.0; 1,2,3,4)
         ));
@@ -302,7 +304,7 @@ pub(crate) mod tests {
     fn zero_grad() {
         let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);
         node.zero_grad();
-        assert!(equal_arrays(
+        assert!(equal_data(
             node.grad().clone(),
             arrayfire::constant!(0.0; 1,2,3,4)
         ));

diff --git a/src/graph/tape.rs b/src/graph/tape.rs
@@ -1,5 +1,4 @@
-use crate::graph::node::Node;
-pub use crate::graph::node::{BinaryReverseFn, NodeId, UnaryReverseFn};
+use crate::graph::node::{Node, NodeId};
 use std::collections::{btree_map::Values, BTreeMap};
 use std::rc::Rc;
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -5,25 +5,25 @@
 //! [reverse automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation).
 //! In other words, what `PyTorch` is to `Python` is what `Mushin` is to `Rust`.
 //!
-//! All the operations on tensors use the excellent [arrayfire](https://arrayfire.com/)
-//! library as a backend. Which means **Mushin can perform computations on any device**
-//! (Nvidia CUDA GPUs, `OpenCL`, Intel MKL... ). Plus, all operations are checked at
-//! compile time for mathematical correctness. I.e. You won't be able to add two tensors
-//! of different shape/dimensions. The shape of the resulting tensors for all your
-//! operations is tracked through the computation graph so in that regard we can offer
-//! a guarantee that `Tensorflow` or `PyTorch` can't: **If it compiles, your computation
-//! graph is guaranteed to be correct**
+//! This crate is backed by [arrayfire](https://arrayfire.com/) to perform the tensor
+//! operations on any device, namely Nvidia CUDA GPUs, `OpenCL`, Intel MKL... On top of that,
+//! all operations are checked at compile time for mathematical correctness.
+//! I.e. You won't be able to add two tensors of different shape/dimensions.
+//! The shapes of the resulting tensors for all your operations are tracked through the computation
+//! graph at compilation time so in that regard we can offer a guarantee that `Tensorflow`
+//! or `PyTorch` can't: **If it compiles, your computation graph is guaranteed to be correct**
 //!
 //! ## Usage
 //! ```rust
+//! #![feature(generic_const_exprs)]
+//!
 //! use mushin as mu;
-//! use mu::Tensor;
 //!
 //! let x = mu::eye::<1, 1, 2, 3>(3.0).freeze();
 //! let w = mu::randn::<1, 1, 3, 2>();
 //! let b = mu::fill::<1, 1, 3, 3>(0.0);
 //!
-//! let z = w.mm(&x).add(&b);
+//! let z = mu::add(&mu::mm(&w, &x), &b);
 //! z.backward();
 //!
 //! let dz_dw = w.grad();
@@ -37,11 +37,10 @@
 //! on the result of previous operations.
 //!
 //! Mushin automatically keeps track of all the operations performed up until any given variable
-//! and calling `backward()` in one of them traverses the computation graph in
-//! [reverse mode](https://en.wikipedia.org/wiki/Automatic_differentiation) to accumulate the
-//! gradients of all of its ancestor variables. By using the `grad()` method in any of them we can
-//! now retrieve their gradients as new `Variable` tensor, which in turn can be used to compute
-//! further gradients!
+//! and calling `backward()` in one of them traverses the computation graph in reverse mode
+//! to accumulate the gradients of all of its ancestor variables.
+//! By using the `grad()` method in any of them we can now retrieve their gradients as new variable
+//! tensor, which in turn can be used to compute further gradients!
 //!
 //! It is quite possible the reader is more interested in the Deep Learning utilities of this
 //! library rather than the raw auto-grad foundations.
@@ -61,104 +60,25 @@
     clippy::missing_inline_in_public_items
 )]
 #![allow(incomplete_features)]
-#![feature(associated_const_equality)]
 #![feature(generic_const_exprs)]
+#![feature(associated_const_equality)]
 
 #[cfg(feature = "nn")]
 pub mod nn;
 
+mod gen;
 mod graph;
+mod ops;
 mod tensor;
 
-use graph::{node::Node, tape::Tape};
-use tensor::variable::Variable;
-
-pub use tensor::Tensor;
-
-/// Creates a `Variable` tensor filled with the given value
-#[must_use]
-#[inline]
-pub fn fill<const B: u64, const C: u64, const H: u64, const W: u64>(
-    v: f32,
-) -> Variable<B, C, H, W> {
-    let data = arrayfire::constant!(v; H,W,C,B);
-    Variable::new(Tape::default(), Node::declaration(data))
-}
-
-/// Creates a `Variable` tensor with the main diagonal filled with the given value, 0 everywhere else
-#[must_use]
-#[inline]
-pub fn eye<const B: u64, const C: u64, const H: u64, const W: u64>(v: f32) -> Variable<B, C, H, W> {
-    let data = v * arrayfire::identity::<f32>(arrayfire::dim4!(H, W, C, B));
-    Variable::new(Tape::default(), Node::declaration(data))
-}
-
-/// Creates a `Variable` tensor with random values taken from a uniform distribution between [0,1]
-#[must_use]
-#[inline]
-pub fn randu<const B: u64, const C: u64, const H: u64, const W: u64>() -> Variable<B, C, H, W> {
-    let data = arrayfire::randu!(H, W, C, B);
-    Variable::new(Tape::default(), Node::declaration(data))
-}
-
-/// Creates a `Variable` tensor with random values taken from a normal distribution centered at 0
-#[must_use]
-#[inline]
-pub fn randn<const B: u64, const C: u64, const H: u64, const W: u64>() -> Variable<B, C, H, W> {
-    let data = arrayfire::randn!(H, W, C, B);
-    Variable::new(Tape::default(), Node::declaration(data))
-}
-
-/// Creates a `Variable` tensor from the given array of values
-#[must_use]
-#[inline]
-pub fn custom<const B: u64, const C: u64, const H: u64, const W: u64>(
-    values: &[f32],
-) -> Variable<B, C, H, W> {
-    let data = arrayfire::Array::new(values, arrayfire::dim4!(H, W, C, B));
-    Variable::new(Tape::default(), Node::declaration(data))
-}
+pub use gen::{custom, eye, fill, randn, randu};
+pub use ops::{add, cos, div, mm, mul, reshape, sin, sub};
 
 #[cfg(test)]
 mod tests {
-    use crate as mu;
-    use arrayfire::{abs, all_true_all, constant, dim4, identity, le, Array};
-    use mu::Tensor;
+    use arrayfire::{abs, all_true_all, le, Array};
 
-    pub(crate) fn equal_arrays(x: Array<f32>, y: Array<f32>) -> bool {
+    pub(crate) fn equal_data(x: Array<f32>, y: Array<f32>) -> bool {
         all_true_all(&le(&abs(&(x - y)), &1e-6, false)).0
     }
-
-    #[test]
-    fn fill() {
-        let x = mu::fill::<1, 2, 3, 4>(2.0);
-        assert!(equal_arrays(x.data(), constant!(2.0; 3,4,2,1)));
-    }
-
-    #[test]
-    fn eye() {
-        let x = mu::eye::<1, 2, 3, 4>(2.0);
-        assert!(equal_arrays(
-            x.data(),
-            identity::<f32>(dim4!(3, 4, 2, 1)) * 2.0f32
-        ));
-    }
-
-    #[test]
-    fn randu() {
-        let x = mu::randu::<1, 2, 3, 4>();
-        assert!(all_true_all(&le(&x.data(), &constant!(1.0; 3,4,2,1), false)).0)
-    }
-
-    #[test]
-    fn randn() {
-        let x = mu::randn::<1, 2, 3, 4>();
-        assert!(all_true_all(&le(&x.data(), &constant!(5.0; 3,4,2,1), false)).0)
-    }
-
-    #[test]
-    fn custom() {
-        let x = mu::custom::<1, 1, 1, 1>(&[1.0]);
-        assert!(equal_arrays(x.data(), constant!(1.0;1,1,1,1)));
-    }
 }