diff --git a/examples/01-tensor.rs b/examples/01-tensor.rs index 73e0944bd..de783264f 100644 --- a/examples/01-tensor.rs +++ b/examples/01-tensor.rs @@ -2,7 +2,8 @@ use rand::thread_rng; -use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor3D, TensorCreator}; +use dfdx::arrays::HasArrayData; +use dfdx::tensor::{tensor, Tensor1D, Tensor2D, Tensor3D, TensorCreator}; fn main() { // easily create tensors using the `tensor` function diff --git a/examples/02-ops.rs b/examples/02-ops.rs index 0f0e4b6df..492eb9eac 100644 --- a/examples/02-ops.rs +++ b/examples/02-ops.rs @@ -2,7 +2,8 @@ use rand::prelude::*; -use dfdx::tensor::{HasArrayData, Tensor0D, Tensor2D, TensorCreator}; +use dfdx::arrays::HasArrayData; +use dfdx::tensor::{Tensor0D, Tensor2D, TensorCreator}; use dfdx::tensor_ops::add; fn main() { diff --git a/examples/05-optim.rs b/examples/05-optim.rs index 625a578d5..94e5c3109 100644 --- a/examples/05-optim.rs +++ b/examples/05-optim.rs @@ -2,11 +2,12 @@ use rand::prelude::*; +use dfdx::arrays::HasArrayData; use dfdx::gradients::{Gradients, OwnedTape}; use dfdx::losses::mse_loss; use dfdx::nn::{Linear, Module, ReLU, ResetParams, Tanh}; use dfdx::optim::{Momentum, Optimizer, Sgd, SgdConfig}; -use dfdx::tensor::{HasArrayData, Tensor2D, TensorCreator}; +use dfdx::tensor::{Tensor2D, TensorCreator}; // first let's declare our neural network to optimze type Mlp = ( diff --git a/examples/06-mnist.rs b/examples/06-mnist.rs index c5a3527d4..116ef0152 100644 --- a/examples/06-mnist.rs +++ b/examples/06-mnist.rs @@ -2,6 +2,7 @@ //! to build a neural network that learns to recognize //! the MNIST digits. +use dfdx::data::SubsetIterator; use dfdx::prelude::*; use indicatif::ProgressBar; use mnist::*; diff --git a/examples/08-tensor-broadcast-reduce.rs b/examples/08-tensor-broadcast-reduce.rs index 02f3505fe..15728ccae 100644 --- a/examples/08-tensor-broadcast-reduce.rs +++ b/examples/08-tensor-broadcast-reduce.rs @@ -1,8 +1,8 @@ //! Demonstrates broadcasting tensors to different sizes, and axis reductions //! with BroadcastTo and ReduceTo -use dfdx::arrays::Axis; -use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor4D}; +use dfdx::arrays::{Axis, HasArrayData}; +use dfdx::tensor::{tensor, Tensor1D, Tensor2D, Tensor4D}; use dfdx::tensor_ops::BroadcastTo; fn main() { diff --git a/examples/10-tensor-index.rs b/examples/10-tensor-index.rs index 527e7ea97..7293b6f40 100644 --- a/examples/10-tensor-index.rs +++ b/examples/10-tensor-index.rs @@ -1,6 +1,7 @@ //! Demonstrates how to select sub tensors (index) from tensors -use dfdx::tensor::{tensor, HasArrayData, Tensor2D, Tensor3D}; +use dfdx::arrays::HasArrayData; +use dfdx::tensor::{tensor, Tensor2D, Tensor3D}; use dfdx::tensor_ops::Select1; fn main() { diff --git a/src/arrays.rs b/src/arrays.rs index 427a70dd9..8fa2873e1 100644 --- a/src/arrays.rs +++ b/src/arrays.rs @@ -180,6 +180,12 @@ pub trait HasArrayType { + HasLastAxis; } +/// Something that has [HasArrayType], and also can return a reference to or mutate `Self::Array`. +pub trait HasArrayData: HasArrayType { + fn data(&self) -> &Self::Array; + fn mut_data(&mut self) -> &mut Self::Array; +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/data.rs b/src/data.rs index 96b38b270..20cc1c795 100644 --- a/src/data.rs +++ b/src/data.rs @@ -1,19 +1,21 @@ //! A collection of data utility classes such as [one_hot_encode()] and [SubsetIterator]. -use crate::prelude::*; use rand::prelude::SliceRandom; +use crate::arrays::HasArrayData; +use crate::tensor::{Tensor1D, Tensor2D, TensorCreator}; + /// Generates a tensor with ordered data from 0 to `N`. /// /// Examples: /// ```rust -/// # use dfdx::prelude::*; +/// # use dfdx::{prelude::*, data::arange}; /// let t = arange::<5>(); /// assert_eq!(t.data(), &[0.0, 1.0, 2.0, 3.0, 4.0]); /// ``` /// /// ```rust -/// # use dfdx::prelude::*; +/// # use dfdx::{prelude::*, data::arange}; /// let t: Tensor1D<10> = arange(); /// assert_eq!(t.data(), &[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]); /// ``` @@ -39,8 +41,7 @@ pub fn arange() -> Tensor1D { /// /// Examples: /// ```rust -/// # use dfdx::prelude::*; -/// +/// # use dfdx::{prelude::*, data::one_hot_encode}; /// let class_labels = [0, 1, 2, 1, 1]; /// // NOTE: 5 is the batch size, 3 is the number of classes /// let probs = one_hot_encode::<5, 3>(&class_labels); @@ -68,14 +69,14 @@ pub fn one_hot_encode(class_labels: &[usize; B]) /// /// Iterating a dataset in order: /// ```rust -/// # use dfdx::prelude::*; +/// # use dfdx::{prelude::*, data::SubsetIterator}; /// let mut subsets = SubsetIterator::<5>::in_order(100); /// assert_eq!(subsets.next(), Some([0, 1, 2, 3, 4])); /// ``` /// /// Iterating a dataset in random order: /// ```rust -/// # use dfdx::prelude::*; +/// # use dfdx::{prelude::*, data::SubsetIterator}; /// # use rand::prelude::*; /// let mut rng = StdRng::seed_from_u64(0); /// let mut subsets = SubsetIterator::<5>::shuffled(100, &mut rng); diff --git a/src/devices/fill.rs b/src/devices/fill.rs index cf7f4acf6..a37f0db33 100644 --- a/src/devices/fill.rs +++ b/src/devices/fill.rs @@ -32,7 +32,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::prelude::ZeroElements; + use crate::arrays::ZeroElements; use rand::{thread_rng, Rng}; #[test] diff --git a/src/devices/foreach.rs b/src/devices/foreach.rs index 337c3c786..b3157936c 100644 --- a/src/devices/foreach.rs +++ b/src/devices/foreach.rs @@ -12,7 +12,7 @@ use crate::arrays::CountElements; /// /// Examples: /// ```rust -/// # use dfdx::prelude::*; +/// # use dfdx::devices::{Cpu, ForEachElement}; /// let mut a = [[0.0; 3]; 2]; /// let b = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]; /// Cpu::foreach_mr(&mut a, &b, &mut |x, y| { diff --git a/src/devices/permute.rs b/src/devices/permute.rs index cd2aa9fc3..3df16a4dc 100644 --- a/src/devices/permute.rs +++ b/src/devices/permute.rs @@ -217,7 +217,7 @@ permutations!([0, 1, 2, 3]); #[cfg(test)] mod tests { use super::*; - use crate::prelude::FillElements; + use crate::devices::FillElements; use rand::{thread_rng, Rng}; #[test] diff --git a/src/gradients.rs b/src/gradients.rs index c472a3fe0..f74e8260b 100644 --- a/src/gradients.rs +++ b/src/gradients.rs @@ -1,8 +1,11 @@ //! Implementations of [GradientTape] and generic Nd array containers via [Gradients]. -use crate::prelude::*; use std::collections::HashMap; +use crate::arrays::HasArrayType; +use crate::devices::{AllocateZeros, HasDevice}; +use crate::unique_id::{HasUniqueId, UniqueId}; + /// Records gradient computations to execute later. /// /// The only two things you can do with this are: @@ -129,7 +132,7 @@ impl Gradients { /// /// Examples: /// ```rust - /// # use dfdx::prelude::*; + /// # use dfdx::{prelude::*, gradients::*}; /// let a = Tensor1D::new([1.0, 2.0, 3.0]); /// let b: Tensor1D<5> = Tensor1D::zeros(); /// let mut gradients: Gradients = Default::default(); @@ -183,7 +186,7 @@ impl Gradients { /// /// Example usage: /// ``` - /// # use dfdx::prelude::*; + /// # use dfdx::{prelude::*, gradients::*}; /// let t = Tensor1D::new([1.0, 2.0, 3.0]); /// let mut gradients: Gradients = Default::default(); /// *gradients.mut_gradient(&t) = [-4.0, 5.0, -6.0]; @@ -202,7 +205,7 @@ impl Gradients { /// /// Example usage: /// ``` - /// # use dfdx::prelude::*; + /// # use dfdx::{prelude::*, gradients::*}; /// let t = Tensor1D::new([1.0, 2.0, 3.0]); /// let mut gradients: Gradients = Default::default(); /// let g: &mut [f32; 3] = gradients.mut_gradient(&t); @@ -231,7 +234,7 @@ impl Gradients { /// /// # Example usage: /// ``` - /// # use dfdx::prelude::*; + /// # use dfdx::{prelude::*, gradients::*}; /// let t = Tensor1D::new([1.0, 2.0, 3.0]); /// let mut gradients: Gradients = Default::default(); /// gradients.mut_gradient(&t); @@ -303,6 +306,8 @@ impl UnusedTensors { #[cfg(test)] mod tests { use super::*; + use crate::devices::Cpu; + use crate::unique_id::unique_id; struct Tensor { id: UniqueId, diff --git a/src/lib.rs b/src/lib.rs index 6bd6c95ec..2ad232282 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,7 +63,7 @@ //! //! 6. Compute gradients with [crate::tensor_ops::backward()]. See [crate::tensor_ops]. //! ```rust -//! # use dfdx::prelude::*; +//! # use dfdx::{prelude::*, gradients::Gradients}; //! # let mut rng = rand::thread_rng(); //! # let model: Linear<10, 5> = Default::default(); //! # let y_true: Tensor1D<5> = Tensor1D::randn(&mut rng).softmax(); @@ -76,7 +76,7 @@ //! ``` //! 7. Use an optimizer from [crate::optim] to optimize your network! //! ```rust -//! # use dfdx::prelude::*; +//! # use dfdx::{prelude::*, gradients::Gradients}; //! # let mut rng = rand::thread_rng(); //! # let mut model: Linear<10, 5> = Default::default(); //! # let x: Tensor1D<10> = Tensor1D::zeros(); @@ -108,18 +108,14 @@ pub mod unique_id; /// Contains all public exports. pub mod prelude { - pub use crate::arrays::*; - pub use crate::data::*; - pub use crate::devices::*; - pub use crate::gradients::*; + pub use crate::arrays::{AllAxes, Axes2, Axes3, Axes4, Axis, HasArrayData}; + pub use crate::devices::HasDevice; + pub use crate::gradients::{NoneTape, OwnedTape}; pub use crate::losses::*; pub use crate::nn::*; pub use crate::optim::*; pub use crate::tensor::*; pub use crate::tensor_ops::*; - pub use crate::unique_id::*; - - pub use crate::{Assert, ConstTrue}; } #[cfg(not(any( diff --git a/src/losses.rs b/src/losses.rs index 43820290e..d4221f48d 100644 --- a/src/losses.rs +++ b/src/losses.rs @@ -1,6 +1,10 @@ //! Standard loss functions such as [mse_loss()], [cross_entropy_with_logits_loss()], and more. -use crate::prelude::*; +use crate::arrays::{AllAxes, HasArrayType, HasLastAxis}; +use crate::tensor::Tensor; +use crate::tensor_ops::{ + abs, div_scalar, ln, log_softmax, mean, mul, mul_scalar, negate, sqrt, square, sub, Reduce, +}; /// [Mean Squared Error](https://en.wikipedia.org/wiki/Mean_squared_error). /// This computes `(&targ - pred).square().mean()`. @@ -190,9 +194,9 @@ pub fn binary_cross_entropy_with_logits_loss>( #[cfg(test)] mod tests { - use crate::tests::assert_close; - use super::*; + use crate::prelude::*; + use crate::tests::assert_close; #[test] fn test_mse() { diff --git a/src/nn/activations.rs b/src/nn/activations.rs index 03d64ae20..696461927 100644 --- a/src/nn/activations.rs +++ b/src/nn/activations.rs @@ -1,3 +1,5 @@ +use crate::arrays::{HasArrayType, HasLastAxis}; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use rand::Rng; @@ -161,7 +163,7 @@ mod tests { let t = Tensor2D::new([[-2.0, -1.0, 0.0], [1.0, 2.0, 3.0]]); let r1 = Softmax.forward(t.clone()); - let r2 = t.softmax::>(); + let r2 = t.softmax::>(); assert_eq!(r1.data(), r2.data()); } } diff --git a/src/nn/conv.rs b/src/nn/conv.rs index 0fefd0dc8..3a1a352ec 100644 --- a/src/nn/conv.rs +++ b/src/nn/conv.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Tape, UnusedTensors}; use crate::prelude::*; use rand::Rng; use rand_distr::Uniform; diff --git a/src/nn/dropout.rs b/src/nn/dropout.rs index 431241530..7d57e0586 100644 --- a/src/nn/dropout.rs +++ b/src/nn/dropout.rs @@ -1,4 +1,6 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; +use crate::unique_id::unique_id; use rand::{prelude::StdRng, Rng, SeedableRng}; use std::{cell::RefCell, ops::DerefMut}; diff --git a/src/nn/flatten.rs b/src/nn/flatten.rs index 7873237ce..14f93b02d 100644 --- a/src/nn/flatten.rs +++ b/src/nn/flatten.rs @@ -1,4 +1,6 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Tape, UnusedTensors}; use crate::prelude::*; +use crate::{Assert, ConstTrue}; /// **Requires Nightly** Flattens 3d tensors to 1d, and 4d tensors to 2d. /// diff --git a/src/nn/generalized_residual.rs b/src/nn/generalized_residual.rs index 17bfab42f..f77c82e18 100644 --- a/src/nn/generalized_residual.rs +++ b/src/nn/generalized_residual.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; /// A residual connection `R` around `F`: `F(x) + R(x)`, diff --git a/src/nn/impl_module_for_tuples.rs b/src/nn/impl_module_for_tuples.rs index 7d3e16811..45e879c82 100644 --- a/src/nn/impl_module_for_tuples.rs +++ b/src/nn/impl_module_for_tuples.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use rand::prelude::Rng; use std::io::{Read, Seek, Write}; @@ -93,6 +94,7 @@ tuple_impls!([A, B, C, D, E, F] [0, 1, 2, 3, 4, 5], F, [E, D, C, B, A]); mod tests { use super::*; use crate::nn::tests::SimpleGradients; + use crate::unique_id::HasUniqueId; use rand::{prelude::StdRng, SeedableRng}; use std::fs::File; use tempfile::NamedTempFile; diff --git a/src/nn/layer_norm.rs b/src/nn/layer_norm.rs index 633708286..df0f1bb4c 100644 --- a/src/nn/layer_norm.rs +++ b/src/nn/layer_norm.rs @@ -1,3 +1,6 @@ +use crate::arrays::Axis; +use crate::devices::{Cpu, FillElements}; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Tape, UnusedTensors}; use crate::prelude::*; use std::io::{Read, Seek, Write}; use zip::{result::ZipResult, ZipArchive}; @@ -21,8 +24,8 @@ use zip::{result::ZipResult, ZipArchive}; /// ``` #[derive(Debug, Clone)] pub struct LayerNorm1D { - pub gamma: Tensor1D, - pub beta: Tensor1D, + pub gamma: Tensor1D, + pub beta: Tensor1D, pub epsilon: f32, } @@ -125,6 +128,7 @@ impl LoadFromNpz for LayerNorm1D { mod tests { use super::*; use crate::nn::tests::SimpleGradients; + use crate::unique_id::HasUniqueId; use rand::{prelude::StdRng, SeedableRng}; use rand_distr::Standard; use std::fs::File; diff --git a/src/nn/linear.rs b/src/nn/linear.rs index 210e590e9..209a25036 100644 --- a/src/nn/linear.rs +++ b/src/nn/linear.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Tape, UnusedTensors}; use crate::prelude::*; use rand::Rng; use rand_distr::Uniform; @@ -25,10 +26,10 @@ use zip::{result::ZipResult, ZipArchive, ZipWriter}; #[derive(Default, Debug, Clone)] pub struct Linear { /// Transposed weight matrix, shape (O, I) - pub weight: Tensor2D, + pub weight: Tensor2D, /// Bias vector, shape (O, ) - pub bias: Tensor1D, + pub bias: Tensor1D, } impl CanUpdateWithGradients for Linear { @@ -113,6 +114,7 @@ impl #[cfg(test)] mod tests { use super::*; + use crate::unique_id::HasUniqueId; use crate::{nn::tests::SimpleGradients, tests::assert_close}; use rand::{prelude::StdRng, SeedableRng}; use std::fs::File; diff --git a/src/nn/mod.rs b/src/nn/mod.rs index 7e443f3ee..4864610c7 100644 --- a/src/nn/mod.rs +++ b/src/nn/mod.rs @@ -118,7 +118,8 @@ pub use conv::*; #[cfg(test)] mod tests { - use crate::prelude::{GradientProvider, Gradients}; + use crate::gradients::{GradientProvider, Gradients}; + use crate::unique_id::HasUniqueId; #[derive(Default)] pub struct SimpleGradients(pub Gradients); @@ -126,9 +127,7 @@ mod tests { impl GradientProvider for SimpleGradients { fn gradient

(&mut self, p: &P) -> Option> where - P: crate::prelude::HasUniqueId - + crate::prelude::HasArrayType - + crate::prelude::HasDevice, + P: HasUniqueId + crate::arrays::HasArrayType + crate::devices::HasDevice, { self.0.remove(p) } diff --git a/src/nn/module.rs b/src/nn/module.rs index 260fc6d56..ce0283422 100644 --- a/src/nn/module.rs +++ b/src/nn/module.rs @@ -1,4 +1,4 @@ -use crate::prelude::CanUpdateWithGradients; +use crate::gradients::CanUpdateWithGradients; /// A unit of a neural network. Acts on the generic `Input` /// and produces `Module::Output`. @@ -38,7 +38,7 @@ pub trait Module: ResetParams + CanUpdateWithGradients { /// # Example Implementation /// /// ```rust - /// # use dfdx::prelude::*; + /// # use dfdx::{prelude::*, gradients::*}; /// struct MyMulLayer { /// scale: Tensor1D<5, NoneTape>, /// } diff --git a/src/nn/repeated.rs b/src/nn/repeated.rs index 8762efe14..064296dda 100644 --- a/src/nn/repeated.rs +++ b/src/nn/repeated.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use std::io::{Read, Seek, Write}; use zip::{result::ZipResult, ZipArchive, ZipWriter}; @@ -98,6 +99,7 @@ impl, const N: usize> Module for mod tests { use super::*; use crate::nn::tests::SimpleGradients; + use crate::unique_id::HasUniqueId; use rand::{prelude::StdRng, SeedableRng}; use std::fs::File; use tempfile::NamedTempFile; diff --git a/src/nn/residual.rs b/src/nn/residual.rs index 3676d495d..8d0207aec 100644 --- a/src/nn/residual.rs +++ b/src/nn/residual.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; /// A residual connection around `F`: `F(x) + x`, @@ -74,7 +75,7 @@ impl LoadFromNpz for Residual { #[cfg(test)] mod tests { use super::*; - use crate::{nn::tests::SimpleGradients, tests::assert_close}; + use crate::{nn::tests::SimpleGradients, tests::assert_close, unique_id::HasUniqueId}; use rand::{prelude::StdRng, SeedableRng}; use std::fs::File; use tempfile::NamedTempFile; diff --git a/src/nn/split_into.rs b/src/nn/split_into.rs index 4490d3870..be0a5196f 100644 --- a/src/nn/split_into.rs +++ b/src/nn/split_into.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; /// Splits input into multiple heads. `T` should be a tuple, @@ -88,7 +89,7 @@ tuple_impls!([A, B, C, D, E] F); #[cfg(test)] mod tests { use super::*; - use crate::nn::tests::SimpleGradients; + use crate::{nn::tests::SimpleGradients, unique_id::HasUniqueId}; #[test] fn test_split_into_2() { diff --git a/src/nn/transformer/decoder.rs b/src/nn/transformer/decoder.rs index fb3c7236b..dfb05abde 100644 --- a/src/nn/transformer/decoder.rs +++ b/src/nn/transformer/decoder.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use rand::Rng; use std::io::{Read, Seek, Write}; diff --git a/src/nn/transformer/encoder.rs b/src/nn/transformer/encoder.rs index 99e555148..fba29c84b 100644 --- a/src/nn/transformer/encoder.rs +++ b/src/nn/transformer/encoder.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use std::io::{Read, Seek, Write}; use zip::{result::ZipResult, ZipArchive, ZipWriter}; diff --git a/src/nn/transformer/mha.rs b/src/nn/transformer/mha.rs index 791cafc95..addd6b592 100644 --- a/src/nn/transformer/mha.rs +++ b/src/nn/transformer/mha.rs @@ -1,4 +1,6 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Tape, UnusedTensors}; use crate::prelude::*; +use crate::{Assert, ConstTrue}; use rand::Rng; /// **Requires Nightly** A multi-head attention layer. diff --git a/src/nn/transformer/transformer.rs b/src/nn/transformer/transformer.rs index 13a868553..908d68306 100644 --- a/src/nn/transformer/transformer.rs +++ b/src/nn/transformer/transformer.rs @@ -1,3 +1,4 @@ +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; use std::io::{Read, Seek, Write}; use zip::{result::ZipResult, ZipArchive, ZipWriter}; diff --git a/src/optim/adam.rs b/src/optim/adam.rs index 4c46a9647..46d99287d 100644 --- a/src/optim/adam.rs +++ b/src/optim/adam.rs @@ -1,4 +1,8 @@ +use crate::arrays::HasArrayType; +use crate::devices::ForEachElement; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Gradients}; use crate::prelude::*; +use crate::unique_id::HasUniqueId; use std::marker::PhantomData; /// An implementation of the Adam optimizer from diff --git a/src/optim/mod.rs b/src/optim/mod.rs index 2c7b0b554..9e41e579b 100644 --- a/src/optim/mod.rs +++ b/src/optim/mod.rs @@ -14,7 +14,7 @@ //! the [crate::gradients::Gradients]: //! //! ```rust -//! # use dfdx::prelude::*; +//! # use dfdx::{prelude::*, gradients::*}; //! # type MyModel = Linear<5, 2>; //! let mut model: MyModel = Default::default(); //! let mut opt: Sgd = Default::default(); diff --git a/src/optim/optimizer.rs b/src/optim/optimizer.rs index 45792e0b7..4733f4e35 100644 --- a/src/optim/optimizer.rs +++ b/src/optim/optimizer.rs @@ -1,4 +1,4 @@ -use crate::prelude::{CanUpdateWithGradients, Gradients, UnusedTensors}; +use crate::gradients::{CanUpdateWithGradients, Gradients, UnusedTensors}; /// All optimizers must implement the update function, which takes an object /// that implements [CanUpdateWithGradients], and calls [CanUpdateWithGradients::update]. diff --git a/src/optim/rmsprop.rs b/src/optim/rmsprop.rs index 486b4a8fc..30235b591 100644 --- a/src/optim/rmsprop.rs +++ b/src/optim/rmsprop.rs @@ -1,4 +1,8 @@ +use crate::arrays::HasArrayType; +use crate::devices::{FillElements, ForEachElement}; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Gradients}; use crate::prelude::*; +use crate::unique_id::HasUniqueId; use std::marker::PhantomData; /// RMSprop As described in [Hinton, 2012](http://www.cs.toronto.edu/%7Etijmen/csc321/slides/lecture_slides_lec6.pdf). diff --git a/src/optim/sgd.rs b/src/optim/sgd.rs index 64b81b8fe..5a001290c 100644 --- a/src/optim/sgd.rs +++ b/src/optim/sgd.rs @@ -1,4 +1,8 @@ +use crate::arrays::HasArrayType; +use crate::devices::ForEachElement; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, Gradients}; use crate::prelude::*; +use crate::unique_id::HasUniqueId; use std::marker::PhantomData; /// Implementation of Stochastic Gradient Descent. Based on [pytorch's implementation](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html) diff --git a/src/tensor/impl_has_array.rs b/src/tensor/impl_has_array.rs index 5fb8b3df4..0832763cd 100644 --- a/src/tensor/impl_has_array.rs +++ b/src/tensor/impl_has_array.rs @@ -1,11 +1,5 @@ use super::*; -use crate::prelude::*; - -/// Something that has [HasArrayType], and also can return a reference to or mutate `Self::Array`. -pub trait HasArrayData: HasArrayType { - fn data(&self) -> &Self::Array; - fn mut_data(&mut self) -> &mut Self::Array; -} +use crate::arrays::{HasArrayData, HasArrayType}; macro_rules! tensor_impl { ($typename:ident, [$($Vs:tt),*], $arr:ty) => { diff --git a/src/tensor/impl_phantom.rs b/src/tensor/impl_phantom.rs index 39e6f8557..d63a06aeb 100644 --- a/src/tensor/impl_phantom.rs +++ b/src/tensor/impl_phantom.rs @@ -1,4 +1,6 @@ +use crate::arrays::HasArrayType; use crate::prelude::*; +use crate::unique_id::{HasUniqueId, UniqueId}; use std::marker::PhantomData; /// A fake tensor that holds a [UniqueId] and a type `T` that is [HasArrayType]. diff --git a/src/tensor/impl_randomize.rs b/src/tensor/impl_randomize.rs index de236e759..8e7c7d68e 100644 --- a/src/tensor/impl_randomize.rs +++ b/src/tensor/impl_randomize.rs @@ -1,3 +1,4 @@ +use crate::devices::FillElements; use crate::prelude::*; use rand::{distributions::Distribution, Rng}; diff --git a/src/tensor/impl_tensor.rs b/src/tensor/impl_tensor.rs index 25e3ac5f4..3880ca24d 100644 --- a/src/tensor/impl_tensor.rs +++ b/src/tensor/impl_tensor.rs @@ -1,4 +1,7 @@ +use crate::arrays::HasArrayType; +use crate::gradients::{CanUpdateWithGradients, NoneTape, Tape}; use crate::prelude::*; +use crate::unique_id::{unique_id, HasUniqueId}; /// The main tensor trait. A tensor consists of mainly 1. an array, 2. a device, 3. a unique id. pub trait Tensor: diff --git a/src/tensor/impl_tensor_creator.rs b/src/tensor/impl_tensor_creator.rs index 5b23826d5..f9e44aba8 100644 --- a/src/tensor/impl_tensor_creator.rs +++ b/src/tensor/impl_tensor_creator.rs @@ -1,5 +1,8 @@ use super::*; +use crate::devices::{AllocateZeros, FillElements}; +use crate::gradients::NoneTape; use crate::prelude::*; +use crate::unique_id::unique_id; use rand::prelude::Distribution; use rand_distr::{num_traits::One, Standard, StandardNormal}; @@ -68,11 +71,10 @@ tensor_impl!(Tensor4D, [M, N, O, P]); #[cfg(test)] mod tests { - use std::collections::HashSet; - use super::*; - use crate::unique_id::unique_id; + use crate::unique_id::UniqueId; use rand::thread_rng; + use std::collections::HashSet; #[test] fn test_id() { diff --git a/src/tensor/impl_update_with_grads.rs b/src/tensor/impl_update_with_grads.rs index a02de73df..31c297306 100644 --- a/src/tensor/impl_update_with_grads.rs +++ b/src/tensor/impl_update_with_grads.rs @@ -1,3 +1,5 @@ +use crate::devices::Device; +use crate::gradients::{CanUpdateWithGradients, GradientProvider, UnusedTensors}; use crate::prelude::*; impl> CanUpdateWithGradients for T { diff --git a/src/tensor_ops/arith_scalar.rs b/src/tensor_ops/arith_scalar.rs index a95b08d74..c46d8ce4f 100644 --- a/src/tensor_ops/arith_scalar.rs +++ b/src/tensor_ops/arith_scalar.rs @@ -1,5 +1,9 @@ use super::utils::move_tape_and_add_backward_op; -use crate::prelude::*; +use crate::gradients::Tape; +use crate::{ + devices::{Device, ForEachElement}, + prelude::*, +}; use std::ops::{Add, Div, Mul, Sub}; /// `t + val`. `val` is used for all elements of `t`. diff --git a/src/tensor_ops/conv.rs b/src/tensor_ops/conv.rs index 38fc7960e..32647d2f4 100644 --- a/src/tensor_ops/conv.rs +++ b/src/tensor_ops/conv.rs @@ -1,3 +1,4 @@ +use crate::gradients::Tape; use crate::prelude::*; /// **Requires Nightly** Perform a 2d convolution. diff --git a/src/tensor_ops/impl_add.rs b/src/tensor_ops/impl_add.rs index 1f491231f..c0fd19be4 100644 --- a/src/tensor_ops/impl_add.rs +++ b/src/tensor_ops/impl_add.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::{NoneTape, Tape}; use crate::prelude::*; /// Element wise addition. diff --git a/src/tensor_ops/impl_backward.rs b/src/tensor_ops/impl_backward.rs index 99c1f3ccc..da1528412 100644 --- a/src/tensor_ops/impl_backward.rs +++ b/src/tensor_ops/impl_backward.rs @@ -1,3 +1,5 @@ +use crate::devices::{Cpu, FillElements}; +use crate::gradients::{Gradients, Tape}; use crate::prelude::*; /// Runs backprop algorithm with all operations contained in the tape that `t` has. diff --git a/src/tensor_ops/impl_broadcast_reduce.rs b/src/tensor_ops/impl_broadcast_reduce.rs index 9cd2ed960..fca03d485 100644 --- a/src/tensor_ops/impl_broadcast_reduce.rs +++ b/src/tensor_ops/impl_broadcast_reduce.rs @@ -1,6 +1,7 @@ use super::utils::move_tape_and_add_backward_op; -use crate::arrays::{AllAxes, Axes2, Axes3, Axis}; -use crate::devices::{AddAccum, CopyAccum, DeviceReduce}; +use crate::arrays::{AllAxes, Axes2, Axes3, Axis, HasArrayType}; +use crate::devices::{AddAccum, CopyAccum, Cpu, DeviceReduce}; +use crate::gradients::Tape; use crate::prelude::*; /// Broadcast self into `T` along `Axes`. Opposite of [Reduce]. diff --git a/src/tensor_ops/impl_clamp.rs b/src/tensor_ops/impl_clamp.rs index 838c15940..614c928b0 100644 --- a/src/tensor_ops/impl_clamp.rs +++ b/src/tensor_ops/impl_clamp.rs @@ -1,3 +1,4 @@ +use crate::gradients::Tape; use crate::prelude::*; /// Clamp all elements between the provided min and max values. diff --git a/src/tensor_ops/impl_div.rs b/src/tensor_ops/impl_div.rs index e83d14dc4..db8f3d774 100644 --- a/src/tensor_ops/impl_div.rs +++ b/src/tensor_ops/impl_div.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::Tape; use crate::prelude::*; /// Element wise division. diff --git a/src/tensor_ops/impl_dropout.rs b/src/tensor_ops/impl_dropout.rs index 2f0ec4706..667349209 100644 --- a/src/tensor_ops/impl_dropout.rs +++ b/src/tensor_ops/impl_dropout.rs @@ -1,3 +1,4 @@ +use crate::gradients::Tape; use crate::prelude::*; use rand::{rngs::StdRng, Rng, SeedableRng}; use rand_distr::Standard; diff --git a/src/tensor_ops/impl_mask.rs b/src/tensor_ops/impl_mask.rs index 75d1dc8c8..4daddf4b0 100644 --- a/src/tensor_ops/impl_mask.rs +++ b/src/tensor_ops/impl_mask.rs @@ -1,4 +1,6 @@ use super::utils::move_tape_and_add_backward_op; +use crate::devices::{Device, ForEachElement}; +use crate::gradients::Tape; use crate::prelude::*; /// Sets `t` to `value` anywhere `mask` equals value diff --git a/src/tensor_ops/impl_max.rs b/src/tensor_ops/impl_max.rs index ae5b11648..11baea8cc 100644 --- a/src/tensor_ops/impl_max.rs +++ b/src/tensor_ops/impl_max.rs @@ -1,5 +1,6 @@ use super::utils::move_tape_and_add_backward_op; -use crate::devices::{DeviceReduce, EqAccum, MaxAccum, MulAccum}; +use crate::devices::{Device, DeviceReduce, EqAccum, MaxAccum, MulAccum}; +use crate::gradients::Tape; use crate::prelude::*; /// Reduces `Axes` of the tensor by gathering the maximum value from that dimension. diff --git a/src/tensor_ops/impl_maximum.rs b/src/tensor_ops/impl_maximum.rs index d58948a76..5ab4099ac 100644 --- a/src/tensor_ops/impl_maximum.rs +++ b/src/tensor_ops/impl_maximum.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::Tape; use crate::prelude::*; /// Element wise maximum. diff --git a/src/tensor_ops/impl_mean.rs b/src/tensor_ops/impl_mean.rs index c2fe2ab5d..29643e6bb 100644 --- a/src/tensor_ops/impl_mean.rs +++ b/src/tensor_ops/impl_mean.rs @@ -1,3 +1,5 @@ +use crate::arrays::{HasArrayType, HasAxes}; +use crate::gradients::Tape; use crate::prelude::*; /// Average the values along `Axes` of `T`. diff --git a/src/tensor_ops/impl_min.rs b/src/tensor_ops/impl_min.rs index 69a401e06..b1e6dbaeb 100644 --- a/src/tensor_ops/impl_min.rs +++ b/src/tensor_ops/impl_min.rs @@ -1,5 +1,6 @@ use super::utils::move_tape_and_add_backward_op; -use crate::devices::{DeviceReduce, EqAccum, MinAccum, MulAccum}; +use crate::devices::{Device, DeviceReduce, EqAccum, MinAccum, MulAccum}; +use crate::gradients::Tape; use crate::prelude::*; /// Reduces `Axes` of the tensor by gathering the minimum value from the axes. diff --git a/src/tensor_ops/impl_minimum.rs b/src/tensor_ops/impl_minimum.rs index 662ad53d7..9af5fc78f 100644 --- a/src/tensor_ops/impl_minimum.rs +++ b/src/tensor_ops/impl_minimum.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::Tape; use crate::prelude::*; /// Element wise minimum. diff --git a/src/tensor_ops/impl_mul.rs b/src/tensor_ops/impl_mul.rs index efbdf8b40..57b061152 100644 --- a/src/tensor_ops/impl_mul.rs +++ b/src/tensor_ops/impl_mul.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::Tape; use crate::prelude::*; /// Element wise multiplication. diff --git a/src/tensor_ops/impl_nans.rs b/src/tensor_ops/impl_nans.rs index fe4d45c5b..c8ab38b36 100644 --- a/src/tensor_ops/impl_nans.rs +++ b/src/tensor_ops/impl_nans.rs @@ -1,3 +1,4 @@ +use crate::gradients::Tape; use crate::prelude::*; /// Replaces any [std::f32::NAN] with `value`. diff --git a/src/tensor_ops/impl_normalize.rs b/src/tensor_ops/impl_normalize.rs index 73a0440a3..cc78a8f77 100644 --- a/src/tensor_ops/impl_normalize.rs +++ b/src/tensor_ops/impl_normalize.rs @@ -1,3 +1,5 @@ +use crate::arrays::{HasArrayType, HasAxes}; +use crate::gradients::Tape; use crate::prelude::*; /// Normalizes `t` to have mean `0.0` and stddev `1.0` along `Axes` of `T`. `epsilon` is passed to [stddev()]. diff --git a/src/tensor_ops/impl_pow.rs b/src/tensor_ops/impl_pow.rs index 4b4db266f..e3ea59071 100644 --- a/src/tensor_ops/impl_pow.rs +++ b/src/tensor_ops/impl_pow.rs @@ -1,4 +1,5 @@ use super::utils::map; +use crate::gradients::Tape; use crate::prelude::*; /// Raises to a float power. `t^i`. diff --git a/src/tensor_ops/impl_reshape.rs b/src/tensor_ops/impl_reshape.rs index 48fcdafcf..8e1f77ee1 100644 --- a/src/tensor_ops/impl_reshape.rs +++ b/src/tensor_ops/impl_reshape.rs @@ -1,5 +1,9 @@ use super::utils::move_tape_and_add_backward_op; +use crate::arrays::CountElements; +use crate::devices::Device; +use crate::gradients::Tape; use crate::prelude::*; +use crate::{Assert, ConstTrue}; /// **Requires Nightly** Reshape `Self` into `T`. pub trait Reshape { diff --git a/src/tensor_ops/impl_softmax.rs b/src/tensor_ops/impl_softmax.rs index 99cf285f4..529d80faa 100644 --- a/src/tensor_ops/impl_softmax.rs +++ b/src/tensor_ops/impl_softmax.rs @@ -1,4 +1,5 @@ -use crate::devices::{DeviceReduce, MaxAccum, SubAccum}; +use crate::devices::{Device, DeviceReduce, MaxAccum, SubAccum}; +use crate::gradients::Tape; use crate::prelude::*; /// Computes the [LogSumExp](https://en.wikipedia.org/wiki/LogSumExp) function across diff --git a/src/tensor_ops/impl_stddev.rs b/src/tensor_ops/impl_stddev.rs index 0d964bba2..28be84530 100644 --- a/src/tensor_ops/impl_stddev.rs +++ b/src/tensor_ops/impl_stddev.rs @@ -1,3 +1,5 @@ +use crate::arrays::{HasArrayType, HasAxes}; +use crate::gradients::Tape; use crate::prelude::*; /// Reduces `Axes` of `T` by computing std deviation of all values in those axes. diff --git a/src/tensor_ops/impl_sub.rs b/src/tensor_ops/impl_sub.rs index 5dc4afd24..425217872 100644 --- a/src/tensor_ops/impl_sub.rs +++ b/src/tensor_ops/impl_sub.rs @@ -1,4 +1,5 @@ use super::utils::binary_map; +use crate::gradients::Tape; use crate::prelude::*; /// Element wise subtraction. diff --git a/src/tensor_ops/impl_sum.rs b/src/tensor_ops/impl_sum.rs index 8cd34cbff..914a23a2f 100644 --- a/src/tensor_ops/impl_sum.rs +++ b/src/tensor_ops/impl_sum.rs @@ -1,5 +1,6 @@ use super::utils::move_tape_and_add_backward_op; use crate::devices::{AddAccum, DeviceReduce}; +use crate::gradients::Tape; use crate::prelude::*; /// Sum values along axes `Axes` of `T`. diff --git a/src/tensor_ops/map.rs b/src/tensor_ops/map.rs index 387ad6ad5..b9ed58b7a 100644 --- a/src/tensor_ops/map.rs +++ b/src/tensor_ops/map.rs @@ -1,4 +1,5 @@ use super::utils::{map, map_df_uses_fx}; +use crate::gradients::Tape; use crate::prelude::*; use std::ops::Neg; diff --git a/src/tensor_ops/matmul.rs b/src/tensor_ops/matmul.rs index bf9897ca2..aca98277c 100644 --- a/src/tensor_ops/matmul.rs +++ b/src/tensor_ops/matmul.rs @@ -1,4 +1,6 @@ use super::utils::move_tape_and_add_backward_binop; +use crate::devices::{Cpu, MatMul, MatMulOp, Transpose}; +use crate::gradients::Tape; use crate::prelude::*; /// Matrix multiplication. This also supports batched matrix multiplication, @@ -248,7 +250,8 @@ pub fn vecmat_mul_transpose( #[cfg(test)] mod tests { use super::*; - use crate::tests::assert_close; + use crate::arrays::ZeroElements; + use crate::{devices::Device, tests::assert_close}; use rand::thread_rng; #[test] diff --git a/src/tensor_ops/permute.rs b/src/tensor_ops/permute.rs index bf468678b..8a3f98e68 100644 --- a/src/tensor_ops/permute.rs +++ b/src/tensor_ops/permute.rs @@ -1,4 +1,6 @@ use super::utils::move_tape_and_add_backward_op; +use crate::devices::{Cpu, Device, DevicePermute}; +use crate::gradients::Tape; use crate::prelude::*; /// Permutes self into `T` with the new order of axes specified via `Axes`. diff --git a/src/tensor_ops/select.rs b/src/tensor_ops/select.rs index c2256a650..a64818c4e 100644 --- a/src/tensor_ops/select.rs +++ b/src/tensor_ops/select.rs @@ -1,4 +1,8 @@ use super::utils::move_tape_and_add_backward_op; +use crate::devices::{ + BSelectAx1, Device, DeviceSelect, FillElements, SelectAx0, SelectAx1, SelectAx2, SelectAx3, +}; +use crate::gradients::Tape; use crate::prelude::*; /// Select values along a single axis `I` resulting in `T`. Equivalent diff --git a/src/tensor_ops/utils.rs b/src/tensor_ops/utils.rs index 833a36d9a..47e2d7e2a 100644 --- a/src/tensor_ops/utils.rs +++ b/src/tensor_ops/utils.rs @@ -7,6 +7,8 @@ //! 4. You can't really separate these operations since they are very inter-dependent. So it makes //! sense to have a single unit for doing it. +use crate::devices::{AllocateZeros, Device, ForEachElement}; +use crate::gradients::{Gradients, Tape}; use crate::prelude::*; /// `f(t)`. Applies a function `f` to every element of the [Tensor]. The derivative