diff --git a/examples/01-tensor.rs b/examples/01-tensor.rs new file mode 100644 index 000000000..73e0944bd --- /dev/null +++ b/examples/01-tensor.rs @@ -0,0 +1,29 @@ +//! Intro to dfdx::tensor + +use rand::thread_rng; + +use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor3D, TensorCreator}; + +fn main() { + // easily create tensors using the `tensor` function + let _: Tensor1D<5> = tensor([1.0, 2.0, 3.0, 4.0, 5.0]); + + // you can also use [TensorCreator::new] + let _: Tensor1D<5> = TensorCreator::new([1.0, 2.0, 3.0, 4.0, 5.0]); + + // [TensorCreator] has other helpful methods such as all zeros and all ones + let _: Tensor2D<2, 3> = TensorCreator::zeros(); + let _: Tensor2D<2, 3> = TensorCreator::ones(); + + // we can also create random tensors + let mut rng = thread_rng(); + let a: Tensor3D<2, 3, 4> = TensorCreator::randn(&mut rng); + + // use `.data()` to access the underlying array + let a_data: &[[[f32; 4]; 3]; 2] = a.data(); + println!("a={:?}", a_data); + + // you can clone() a tensor (or duplicate()): + let a_copy = a.clone(); + assert_eq!(a_copy.data(), a.data()); +} diff --git a/examples/02-ops.rs b/examples/02-ops.rs new file mode 100644 index 000000000..0f0e4b6df --- /dev/null +++ b/examples/02-ops.rs @@ -0,0 +1,32 @@ +//! Intro to dfdx::tensor_ops + +use rand::prelude::*; + +use dfdx::tensor::{HasArrayData, Tensor0D, Tensor2D, TensorCreator}; +use dfdx::tensor_ops::add; + +fn main() { + let mut rng = StdRng::seed_from_u64(0); + + let a: Tensor2D<2, 3> = TensorCreator::randn(&mut rng); + dbg!(a.data()); + + let b: Tensor2D<2, 3> = TensorCreator::randn(&mut rng); + dbg!(b.data()); + + // we can do binary operations like add two tensors together + let c = add(a, &b); + dbg!(c.data()); + + // or unary operations like apply the `relu` function to each element + let d = c.relu(); + dbg!(d.data()); + + // we can add/sub/mul/div scalar values to tensors + let e = d + 0.5; + dbg!(e.data()); + + // or reduce tensors to smaller sizes + let f: Tensor0D = e.mean(); + dbg!(f.data()); +} diff --git a/examples/03-nn.rs b/examples/03-nn.rs new file mode 100644 index 000000000..286ab6f28 --- /dev/null +++ b/examples/03-nn.rs @@ -0,0 +1,29 @@ +//! Intro to dfdx::nn + +use rand::prelude::*; + +use dfdx::nn::{Linear, Module, ReLU, ResetParams}; +use dfdx::tensor::{Tensor1D, Tensor2D, TensorCreator}; + +fn main() { + // nn exposes many different neural network types, like the Linear layer! + let mut m: Linear<4, 2> = Default::default(); + + // at first they are initialized to zeros, but you can randomize them too + let mut rng = StdRng::seed_from_u64(0); + m.reset_params(&mut rng); + + // they act on tensors using the forward method + let x: Tensor1D<4> = TensorCreator::zeros(); + let _: Tensor1D<2> = m.forward(x); + + // most of them can also act on many different shapes of tensors + let x: Tensor2D<10, 4> = TensorCreator::zeros(); + let _: Tensor2D<10, 2> = m.forward(x); + + // you can also combine multiple modules with tuples + let mlp: (Linear<4, 2>, ReLU, Linear<2, 1>) = Default::default(); + + let x: Tensor1D<4> = TensorCreator::zeros(); + let _: Tensor1D<1> = mlp.forward(x); +} diff --git a/examples/04-gradients.rs b/examples/04-gradients.rs new file mode 100644 index 000000000..2737feffe --- /dev/null +++ b/examples/04-gradients.rs @@ -0,0 +1,35 @@ +//! Intro to dfdx::gradients and tapes + +use rand::prelude::*; + +use dfdx::gradients::{Gradients, NoneTape, OwnedTape}; +use dfdx::tensor::{Tensor0D, Tensor2D, TensorCreator}; +use dfdx::tensor_ops::matmul; + +fn main() { + let mut rng = StdRng::seed_from_u64(0); + + // tensors are first created with no tapes on them - the NoneTape! + let weight: Tensor2D<4, 2, NoneTape> = TensorCreator::randn(&mut rng); + let a: Tensor2D<3, 4, NoneTape> = TensorCreator::randn(&mut rng); + + // the first step to tracing is to call .trace() + // this sticks a gradient tape into the input tensor! + let b: Tensor2D<3, 4, OwnedTape> = a.trace(); + + // the tape will automatically move around as you perform ops + let c: Tensor2D<3, 2, OwnedTape> = matmul(b, &weight); + let d: Tensor2D<3, 2, OwnedTape> = c.sin(); + let e: Tensor0D = d.mean(); + + // finally you can use .backward() to extract the gradients! + let gradients: Gradients = e.backward(); + + // now you can extract gradients for specific tensors + // by querying with them + let weight_grad: &[[f32; 2]; 4] = gradients.ref_gradient(&weight); + dbg!(weight_grad); + + let a_grad: &[[f32; 4]; 3] = gradients.ref_gradient(&a); + dbg!(a_grad); +} diff --git a/examples/05-optim.rs b/examples/05-optim.rs new file mode 100644 index 000000000..625a578d5 --- /dev/null +++ b/examples/05-optim.rs @@ -0,0 +1,60 @@ +//! Intro to dfdx::optim + +use rand::prelude::*; + +use dfdx::gradients::{Gradients, OwnedTape}; +use dfdx::losses::mse_loss; +use dfdx::nn::{Linear, Module, ReLU, ResetParams, Tanh}; +use dfdx::optim::{Momentum, Optimizer, Sgd, SgdConfig}; +use dfdx::tensor::{HasArrayData, Tensor2D, TensorCreator}; + +// first let's declare our neural network to optimze +type Mlp = ( + (Linear<5, 32>, ReLU), + (Linear<32, 32>, ReLU), + (Linear<32, 2>, Tanh), +); + +fn main() { + let mut rng = StdRng::seed_from_u64(0); + + // The first step to optimizing is to initialize the optimizer. + // Here we construct a stochastic gradient descent optimizer + // for our Mlp. + let mut sgd: Sgd = Sgd::new(SgdConfig { + lr: 1e-1, + momentum: Some(Momentum::Nesterov(0.9)), + }); + + // let's initialize our model and some dummy data + let mut mlp: Mlp = Default::default(); + mlp.reset_params(&mut rng); + let x: Tensor2D<3, 5> = TensorCreator::randn(&mut rng); + let y: Tensor2D<3, 2> = TensorCreator::randn(&mut rng); + + // first we pass our gradient tracing input through the network + let prediction: Tensor2D<3, 2, OwnedTape> = mlp.forward(x.trace()); + + // next compute the loss against the target dummy data + let loss = mse_loss(prediction, &y); + dbg!(loss.data()); + + // extract the gradients + let gradients: Gradients = loss.backward(); + + // the final step is to use our optimizer to update our model + // given the gradients we've calculated. + // This will modify our model! + sgd.update(&mut mlp, gradients) + .expect("Oops, there were some unused params"); + + // let's do this a couple times to make sure the loss decreases! + for i in 0..5 { + let prediction = mlp.forward(x.trace()); + let loss = mse_loss(prediction, &y); + println!("Loss after update {i}: {:?}", loss.data()); + let gradients: Gradients = loss.backward(); + sgd.update(&mut mlp, gradients) + .expect("Oops, there were some unused params"); + } +} diff --git a/examples/mnist_classifier.rs b/examples/06-mnist.rs similarity index 89% rename from examples/mnist_classifier.rs rename to examples/06-mnist.rs index f26f94158..c5a3527d4 100644 --- a/examples/mnist_classifier.rs +++ b/examples/06-mnist.rs @@ -1,3 +1,7 @@ +//! This example ties all the previous ones together +//! to build a neural network that learns to recognize +//! the MNIST digits. + use dfdx::prelude::*; use indicatif::ProgressBar; use mnist::*; @@ -39,6 +43,7 @@ impl MnistDataset { } } +// our network structure type Mlp = ( (Linear<784, 512>, ReLU), (Linear<512, 128>, ReLU), @@ -46,9 +51,13 @@ type Mlp = ( Linear<32, 10>, ); +// training batch size const BATCH_SIZE: usize = 32; fn main() { + // ftz substantially improves performance + dfdx::flush_denormals_to_zero(); + let mnist_path = std::env::args() .nth(1) .unwrap_or_else(|| "./datasets/MNIST/raw".to_string()); @@ -58,10 +67,12 @@ fn main() { let mut rng = StdRng::seed_from_u64(0); + // initialize model and optimizer let mut model: Mlp = Default::default(); model.reset_params(&mut rng); let mut opt: Adam = Default::default(); + // initialize dataset let dataset = MnistDataset::train(&mnist_path); println!("Found {:?} training images", dataset.len()); @@ -94,6 +105,7 @@ fn main() { ); } + // save our model to a .npz file model .save("mnist-classifier.npz") .expect("failed to save model"); diff --git a/examples/custom.rs b/examples/07-custom-module.rs similarity index 59% rename from examples/custom.rs rename to examples/07-custom-module.rs index df8a6eb8d..aafead888 100644 --- a/examples/custom.rs +++ b/examples/07-custom-module.rs @@ -1,5 +1,10 @@ -use dfdx::prelude::*; -use rand::prelude::{SeedableRng, StdRng}; +//! Demonstrates how to build a custom [nn::Module] without using tuples + +use rand::prelude::*; + +use dfdx::gradients::{CanUpdateWithGradients, GradientProvider, OwnedTape, Tape, UnusedTensors}; +use dfdx::nn::{Linear, Module, ReLU, ResetParams}; +use dfdx::tensor::{Tensor1D, Tensor2D, TensorCreator}; /// Custom model struct /// This case is trivial and should be done with a tuple of linears and relus, @@ -11,6 +16,7 @@ struct Mlp { relu: ReLU, } +// ResetParams lets you randomize a model's parameters impl ResetParams for Mlp { fn reset_params(&mut self, rng: &mut R) { self.l1.reset_params(rng); @@ -19,6 +25,7 @@ impl ResetParams for Mlp< } } +// CanUpdateWithGradients lets you update a model's parameters using gradients impl CanUpdateWithGradients for Mlp { @@ -29,25 +36,29 @@ impl CanUpdateWithGradien } } -// Impl module for single forward pass +// impl Module for single item impl Module> for Mlp { type Output = Tensor1D; - fn forward(&self, input: Tensor1D) -> Self::Output { - self.l2.forward(self.relu.forward(self.l1.forward(input))) + fn forward(&self, x: Tensor1D) -> Self::Output { + let x = self.l1.forward(x); + let x = self.relu.forward(x); + self.l2.forward(x) } } -// Impl module for batch forward pass -impl - Module> for Mlp +// impl Module for batch of items +impl + Module> for Mlp { - type Output = Tensor2D; + type Output = Tensor2D; - fn forward(&self, input: Tensor2D) -> Self::Output { - self.l2.forward(self.relu.forward(self.l1.forward(input))) + fn forward(&self, x: Tensor2D) -> Self::Output { + let x = self.l1.forward(x); + let x = self.relu.forward(x); + self.l2.forward(x) } } @@ -63,9 +74,9 @@ fn main() { // Forward pass with a single sample let sample: Tensor1D<10> = Tensor1D::randn(&mut rng); - let _y = model.forward(sample); + let _: Tensor1D<10> = model.forward(sample); // Forward pass with a batch of samples let batch: Tensor2D = Tensor2D::randn(&mut rng); - let _y = model.forward(batch); + let _: Tensor2D = model.forward(batch.trace()); } diff --git a/examples/08-tensor-broadcast-reduce.rs b/examples/08-tensor-broadcast-reduce.rs new file mode 100644 index 000000000..02f3505fe --- /dev/null +++ b/examples/08-tensor-broadcast-reduce.rs @@ -0,0 +1,38 @@ +//! Demonstrates broadcasting tensors to different sizes, and axis reductions +//! with BroadcastTo and ReduceTo + +use dfdx::arrays::Axis; +use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor4D}; +use dfdx::tensor_ops::BroadcastTo; + +fn main() { + let a: Tensor1D<3> = tensor([1.0, 2.0, 3.0]); + + // to broadcast, use `BroadcastTo::broadcast()` and specify + // the output type. the axes that are broadcast are inferred for you! + let b: Tensor2D<5, 3> = a.broadcast(); + assert_eq!(b.data(), &[[1.0, 2.0, 3.0]; 5]); + + // we can really broadcast any axes on either side + // here a (5,3) tensor is broacast to (7,5,3,2). + // so 7 is added in front, and 2 is added last + let c: Tensor4D<7, 5, 3, 2> = b.broadcast(); + assert_eq!(c.data(), &[[[[1.0; 2], [2.0; 2], [3.0; 2]]; 5]; 7]); + + // the opposite of broadcast is reducing + // we've already introduced one reduction which is mean + let d: Tensor2D<5, 3> = c.mean(); + assert_eq!(d.data(), &[[1.0, 2.0, 3.0]; 5]); + + // generally you can just specify the output type + // and the reduction & broadcast will work. + // sometimes it's ambiguous though + let e: Tensor1D<1> = tensor([1.0]); + + // here rust doesn't know if the new axis is the first or second + // so we have to explicitly tell it + let f: Tensor2D<1, 1> = BroadcastTo::<_, Axis<1>>::broadcast(e); + + // reductions have the same problem when it's ambiguous + let _: Tensor1D<1> = f.mean::<_, Axis<0>>(); +} diff --git a/examples/09-tensor-permute.rs b/examples/09-tensor-permute.rs new file mode 100644 index 000000000..07a5c646d --- /dev/null +++ b/examples/09-tensor-permute.rs @@ -0,0 +1,23 @@ +//! Demonstrates how to re-order (permute/transpose) the axes of a tensor + +use dfdx::arrays::Axes3; +use dfdx::tensor::{Tensor3D, TensorCreator}; +use dfdx::tensor_ops::PermuteTo; + +fn main() { + let a: Tensor3D<3, 5, 7> = TensorCreator::zeros(); + + // permuting is as easy as just expressing the desired type + let b: Tensor3D<7, 5, 3> = a.permute(); + + // we can do any of the expected combinations! + let _: Tensor3D<5, 7, 3> = b.permute(); + + // just like broadcast/reduce there are times when + // inference is impossible because of ambiguities + let c: Tensor3D<1, 1, 1> = TensorCreator::zeros(); + + // when axes have the same sizes you'll have to indicate + // the axes explicitly to get around this + let _: Tensor3D<1, 1, 1> = PermuteTo::<_, Axes3<1, 0, 2>>::permute(c); +} diff --git a/examples/10-tensor-index.rs b/examples/10-tensor-index.rs new file mode 100644 index 000000000..527e7ea97 --- /dev/null +++ b/examples/10-tensor-index.rs @@ -0,0 +1,29 @@ +//! Demonstrates how to select sub tensors (index) from tensors + +use dfdx::tensor::{tensor, HasArrayData, Tensor2D, Tensor3D}; +use dfdx::tensor_ops::Select1; + +fn main() { + let a: Tensor3D<3, 2, 3> = tensor([ + [[0.00, 0.01, 0.02], [0.10, 0.11, 0.12]], + [[1.00, 1.01, 1.02], [1.10, 1.11, 1.12]], + [[2.00, 2.01, 2.02], [2.10, 2.11, 2.12]], + ]); + + // the easiest thing to do is to select a single element from axis 0 + let b: Tensor2D<2, 3> = a.clone().select(&0); + assert_eq!(b.data(), &a.data()[0]); + + // but we can also select multiple elements from axis 0! + let _: Tensor3D<6, 2, 3> = a.clone().select(&[0, 0, 1, 1, 2, 2]); + + // a 1d array of indices in this case can also mean + // select from the second axis. this is determined by two things: + // 1. we have 3 usize's in our indices, and 3 is the size of the first dimension + // 2. the output type has lost the middle axis, which means the usizes are reducing that axis + let _: Tensor2D<3, 3> = a.clone().select(&[0, 1, 0]); + + // of course we can also select multiple values from the first axis also. + // in this case we just specify multiple indices instead of a single one + let _: Tensor3D<3, 4, 3> = a.select(&[[0, 0, 0, 0], [0, 1, 0, 1], [1, 0, 1, 0]]); +} diff --git a/examples/conv_net.rs b/examples/11-conv-net.rs similarity index 89% rename from examples/conv_net.rs rename to examples/11-conv-net.rs index 19f2eedd3..650df799b 100644 --- a/examples/conv_net.rs +++ b/examples/11-conv-net.rs @@ -1,3 +1,6 @@ +//! Demonstrates how to build a neural network with convolution +//! layers on nightly rust. + #![cfg_attr(feature = "nightly", feature(generic_const_exprs))] #[cfg(not(feature = "nightly"))] diff --git a/examples/12-multi-headed.rs b/examples/12-multi-headed.rs new file mode 100644 index 000000000..a799c45df --- /dev/null +++ b/examples/12-multi-headed.rs @@ -0,0 +1,16 @@ +//! Demonstrates how to build a neural network that has multiple +//! outputs using `SplitInto`. + +use dfdx::nn::{Linear, Module, SplitInto}; +use dfdx::tensor::{tensor, Tensor1D}; + +fn main() { + // SplitInto accepts a tuple of modules. Each one of the items in the + // tuple must accept the same type of input. + // Note that here, both of the linears have the same size input (1) + let m: SplitInto<(Linear<1, 3>, Linear<1, 5>)> = Default::default(); + + // when we forward data through, we get a tuple back! + let x = tensor([1.0]); + let _: (Tensor1D<3>, Tensor1D<5>) = m.forward(x); +} diff --git a/examples/classification.rs b/examples/classification.rs deleted file mode 100644 index 5a6d42c6d..000000000 --- a/examples/classification.rs +++ /dev/null @@ -1,52 +0,0 @@ -use dfdx::prelude::*; -use rand::{rngs::StdRng, SeedableRng}; -use std::time::Instant; - -type Mlp = ( - (Linear<10, 32>, ReLU), - (Linear<32, 32>, ReLU), - Linear<32, 2>, -); - -fn main() { - let mut rng = StdRng::seed_from_u64(0); - - // initialize target data - let x: Tensor2D<64, 10> = Tensor2D::randn(&mut rng); - let y: Tensor2D<64, 2> = Tensor2D::randn(&mut rng).softmax::>(); - - // initialize model - all weights are 0s - let mut mlp: Mlp = Default::default(); - - // randomize model weights - mlp.reset_params(&mut rng); - - // initialize our optimizer - let mut sgd = Sgd::new(SgdConfig { - lr: 1e-1, - momentum: Some(Momentum::Nesterov(0.9)), - }); - - // run through training data - for _i_epoch in 0..15 { - let start = Instant::now(); - - // forward through model, computing gradients - let pred = mlp.forward(x.trace()); - - // compute loss - let loss = cross_entropy_with_logits_loss(pred, &y); - let loss_v /*: f32 */ = *loss.data(); - - // run backprop - let gradients = loss.backward(); - - // update weights with optimizer - sgd.update(&mut mlp, gradients).expect("Unused params"); - - println!("cross entropy={:#.3} in {:?}", loss_v, start.elapsed()); - } - - mlp.save("classification.npz") - .expect("failed to save model"); -} diff --git a/examples/multi_head.rs b/examples/multi_head.rs deleted file mode 100644 index 0cb90daa2..000000000 --- a/examples/multi_head.rs +++ /dev/null @@ -1,42 +0,0 @@ -use dfdx::prelude::*; -use rand::{rngs::StdRng, SeedableRng}; -use std::time::Instant; - -type MultiHeadedMLP = ( - (Linear<10, 32>, ReLU), - (Linear<32, 32>, ReLU), - SplitInto<((Linear<32, 2>, Tanh), (Linear<32, 1>, Tanh))>, -); - -fn main() { - let mut rng = StdRng::seed_from_u64(0); - - // initialize target data - let x: Tensor2D<64, 10> = Tensor2D::randn(&mut rng); - let y1: Tensor2D<64, 2> = Tensor2D::randn(&mut rng); - let y2: Tensor2D<64, 1> = Tensor2D::randn(&mut rng); - - // initialize optimizer & model - let mut mlp: MultiHeadedMLP = Default::default(); - mlp.reset_params(&mut rng); - let mut sgd: Sgd = Default::default(); - - // run through training data - for _i_epoch in 0..15 { - let start = Instant::now(); - - let x = x.trace(); - let (pred1, pred2) = mlp.forward(x); - - // NOTE: we also have to move the tape around when computing losses - let (loss2, tape) = mse_loss(pred2, &y2).split_tape(); - let loss1 = mse_loss(pred1.put_tape(tape), &y1); - - let losses = [*loss1.data(), *loss2.data()]; - let loss = loss1 + &loss2; - let gradients = loss.backward(); - sgd.update(&mut mlp, gradients).expect("Unused params"); - - println!("losses={:.3?} in {:?}", losses, start.elapsed()); - } -} diff --git a/examples/npy_serialize.rs b/examples/numpy-save-load.rs similarity index 91% rename from examples/npy_serialize.rs rename to examples/numpy-save-load.rs index 409cfbfb9..3db82f6cd 100644 --- a/examples/npy_serialize.rs +++ b/examples/numpy-save-load.rs @@ -1,3 +1,5 @@ +//! Demonstrates how to use dfdx::numpy to save and load arrays + use dfdx::numpy as np; fn main() { diff --git a/examples/regression.rs b/examples/regression.rs deleted file mode 100644 index b9394e09a..000000000 --- a/examples/regression.rs +++ /dev/null @@ -1,51 +0,0 @@ -use dfdx::prelude::*; -use rand::{rngs::StdRng, SeedableRng}; -use std::time::Instant; - -// our simple 2 layer feedforward network with ReLU activations -type Mlp = ( - (Linear<10, 32>, ReLU), - (Linear<32, 32>, ReLU), - (Linear<32, 2>, Tanh), -); - -fn main() { - let mut rng = StdRng::seed_from_u64(0); - - // initialize target data - let x: Tensor2D<64, 10> = Tensor2D::randn(&mut rng); - let y: Tensor2D<64, 2> = Tensor2D::randn(&mut rng); - - // initiliaze model - all weights are 0s - let mut mlp: Mlp = Default::default(); - - // randomize model weights - mlp.reset_params(&mut rng); - - let mut sgd = Sgd::new(SgdConfig { - lr: 1e-1, - momentum: Some(Momentum::Nesterov(0.9)), - }); - - // run through training data - for _i_epoch in 0..15 { - let start = Instant::now(); - - // forward through model, computing gradients - let pred = mlp.forward(x.trace()); - - // compute loss - let loss = mse_loss(pred, &y); - let loss_v /*: f32 */ = *loss.data(); - - // run backprop - let gradients = loss.backward(); - - // update weights with optimizer - sgd.update(&mut mlp, gradients).expect("Unused params"); - - println!("mse={:#.3} in {:?}", loss_v, start.elapsed()); - } - - mlp.save("regression.npz").expect("failed to save mlp"); -} diff --git a/examples/dqn.rs b/examples/rl-dqn.rs similarity index 100% rename from examples/dqn.rs rename to examples/rl-dqn.rs diff --git a/examples/ppo.rs b/examples/rl-ppo.rs similarity index 100% rename from examples/ppo.rs rename to examples/rl-ppo.rs diff --git a/examples/tensors.rs b/examples/tensors.rs deleted file mode 100644 index a5eb62297..000000000 --- a/examples/tensors.rs +++ /dev/null @@ -1,23 +0,0 @@ -#![allow(clippy::needless_range_loop)] -use dfdx::prelude::*; - -fn main() { - let a: Tensor2D<2, 3> = TensorCreator::zeros(); - - // since add() expects tensors with the same size, we dont need a type for this - let b = TensorCreator::ones(); - let c = add(a, &b); - - // tensors just store raw rust arrays, use `.data()` to access this. - assert_eq!(c.data(), &[[1.0; 3]; 2]); - - // since we pass in an array, rust will figure out that we mean Tensor1D<5> since its an [f32; 5] - let mut d = Tensor1D::new([1.0, 2.0, 3.0, 4.0, 5.0]); - - // use `.mut_data()` to access underlying mutable array. type is provided for readability - let raw_data: &mut [f32; 5] = d.mut_data(); - for i in 0..5 { - raw_data[i] *= 2.0; - } - assert_eq!(d.data(), &[2.0, 4.0, 6.0, 8.0, 10.0]); -}