Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Cuda device and skeleton cuda kernel impls #322

Merged
merged 16 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@ matrixmultiply = { version = "0.3.2", default-features = false }
zip = { version = "0.6.2", default-features = false, optional = true }
cblas-sys = { version = "0.1.4", default-features = false, optional = true }
libc = { version = "0.2", default-features = false, optional = true }
cudarc = { version = "0.5.0", default-features = false, optional = true }

[features]
default = ["std", "numpy"]
std = ["no-std-compat/std", "rand/std", "rand_distr/std"]
std = ["no-std-compat/std", "rand/std", "rand_distr/std", "cudarc?/std"]
nightly = []
numpy = ["dep:zip", "std"]
cblas = ["dep:cblas-sys", "dep:libc"]
intel-mkl = ["cblas"]
cuda = ["dep:cudarc"]
test-cuda = ["cuda"]

[dev-dependencies]
rand = "0.8.5"
Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,12 @@ pub fn keep_denormals() {
pub(crate) mod tests {
const TOLERANCE: f32 = 1e-6;

#[cfg(not(feature = "test-cuda"))]
pub type TestDevice = crate::tensor::Cpu;

#[cfg(feature = "test-cuda")]
pub type TestDevice = crate::tensor::Cuda;

pub trait AssertClose {
fn get_far_pair(&self, rhs: &Self, tolerance: f32) -> Option<(f32, f32)>;
fn assert_close(&self, rhs: &Self, tolerance: f32)
Expand Down
42 changes: 22 additions & 20 deletions src/nn/add_into.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ mod tests {
#[test]
fn test_add_into_3() {
let dev: TestDevice = Default::default();
let m: AddInto<(Linear<2, 5>, Linear<3, 5>, Linear<4, 5>)> = dev.build_module();
let m: AddInto<(Linear<2, 5, _>, Linear<3, 5, _>, Linear<4, 5, _>)> = dev.build_module();
let _: Tensor<Rank1<5>, _, _, OwnedTape<_>> = m.forward((
dev.zeros::<Rank1<2>>().traced(),
dev.zeros::<Rank1<3>>().traced(),
Expand All @@ -127,8 +127,12 @@ mod tests {
#[test]
fn test_add_into_4() {
let dev: TestDevice = Default::default();
type Model = AddInto<(Linear<2, 5>, Linear<3, 5>, Linear<4, 5>, Linear<5, 5>)>;
let m: Model = dev.build_module();
let m: AddInto<(
Linear<2, 5, _>,
Linear<3, 5, _>,
Linear<4, 5, _>,
Linear<5, 5, _>,
)> = dev.build_module();
let _: Tensor<Rank1<5>, _, _, OwnedTape<_>> = m.forward((
dev.zeros::<Rank1<2>>().traced(),
dev.zeros::<Rank1<3>>().traced(),
Expand All @@ -146,14 +150,13 @@ mod tests {
#[test]
fn test_add_into_5() {
let dev: TestDevice = Default::default();
type Model = AddInto<(
Linear<2, 5>,
Linear<3, 5>,
Linear<4, 5>,
Linear<5, 5>,
Linear<6, 5>,
)>;
let m: Model = dev.build_module();
let m: AddInto<(
Linear<2, 5, _>,
Linear<3, 5, _>,
Linear<4, 5, _>,
Linear<5, 5, _>,
Linear<6, 5, _>,
)> = dev.build_module();
let _: Tensor<Rank1<5>, _, _, OwnedTape<_>> = m.forward((
dev.zeros::<Rank1<2>>().traced(),
dev.zeros::<Rank1<3>>().traced(),
Expand All @@ -173,15 +176,14 @@ mod tests {
#[test]
fn test_add_into_6() {
let dev: TestDevice = Default::default();
type Model = AddInto<(
Linear<2, 5>,
Linear<3, 5>,
Linear<4, 5>,
Linear<5, 5>,
Linear<6, 5>,
Linear<7, 5>,
)>;
let m: Model = dev.build_module();
let m: AddInto<(
Linear<2, 5, _>,
Linear<3, 5, _>,
Linear<4, 5, _>,
Linear<5, 5, _>,
Linear<6, 5, _>,
Linear<7, 5, _>,
)> = dev.build_module();
let _: Tensor<Rank1<5>, _, _, OwnedTape<_>> = m.forward((
dev.zeros::<Rank1<2>>().traced(),
dev.zeros::<Rank1<3>>().traced(),
Expand Down
2 changes: 1 addition & 1 deletion src/nn/batchnorm2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ mod tests {
fn test_batchnorm2d_3d_forward_mut() {
let dev = TestDevice::seed_from_u64(0);

let x1: Tensor<Rank3<3, 2, 2>> = dev.sample(rand_distr::StandardNormal);
let x1: Tensor<Rank3<3, 2, 2>, f32, _> = dev.sample(rand_distr::StandardNormal);
let mut bn: BatchNorm2D<3, _> = dev.build_module();

let y1 = bn.forward_mut(x1.trace());
Expand Down
12 changes: 6 additions & 6 deletions src/nn/impl_module_for_tuples.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ mod tests {

#[test]
fn test_set_to_1() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
assert_eq!(
SetTo1::<0, 5>::default().forward(dev.zeros()).array(),
[1.0, 0.0, 0.0, 0.0, 0.0]
Expand Down Expand Up @@ -184,31 +184,31 @@ mod tests {

#[test]
fn test_2_tuple_forward() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
let model: (SetTo1<0, 2>, SetTo1<1, 2>) = Default::default();
let y = model.forward(dev.zeros());
assert_eq!(y.array(), [1.0, 1.0]);
}

#[test]
fn test_3_tuple_forward() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
let model: (SetTo1<0, 3>, SetTo1<1, 3>, SetTo1<2, 3>) = Default::default();
let y = model.forward(dev.zeros());
assert_eq!(y.array(), [1.0, 1.0, 1.0]);
}

#[test]
fn test_4_tuple_forward() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
let model: (SetTo1<0, 4>, SetTo1<1, 4>, SetTo1<2, 4>, SetTo1<3, 4>) = Default::default();
let y = model.forward(dev.zeros());
assert_eq!(y.array(), [1.0, 1.0, 1.0, 1.0]);
}

#[test]
fn test_5_tuple_forward() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
let model: (
SetTo1<0, 5>,
SetTo1<1, 5>,
Expand All @@ -222,7 +222,7 @@ mod tests {

#[test]
fn test_6_tuple_forward() {
let dev: TestDevice = Default::default();
let dev: Cpu = Default::default();
let model: (
SetTo1<0, 6>,
SetTo1<1, 6>,
Expand Down
4 changes: 2 additions & 2 deletions src/nn/npz_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,8 @@ mod tests {
let x = dev.sample_normal::<Rank3<3, 4, 5>>();
let file = NamedTempFile::new().expect("failed to create tempfile");

let mut saved: BatchNorm2D<3> = dev.build_module();
let mut loaded: BatchNorm2D<3> = dev.build_module();
let mut saved: BatchNorm2D<3, _> = dev.build_module();
let mut loaded: BatchNorm2D<3, _> = dev.build_module();

saved.running_mean.fill_with_distr(Standard);
saved.running_var.fill_with_distr(Standard);
Expand Down
46 changes: 23 additions & 23 deletions src/nn/split_into.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ mod tests {
#[test]
fn test_split_into_2() {
let dev: TestDevice = Default::default();
type Model = SplitInto<(Linear<5, 1>, Linear<5, 2>)>;
let m: Model = dev.build_module();
let m: SplitInto<(Linear<5, 1, _>, Linear<5, 2, _>)> = dev.build_module();
let _: (Tensor<Rank1<1>, _, _>, Tensor<Rank1<2>, _, _, OwnedTape<_>>) =
m.forward(dev.zeros::<Rank1<5>>().traced());
let _: (
Expand All @@ -134,8 +133,7 @@ mod tests {
#[test]
fn test_split_into_3() {
let dev: TestDevice = Default::default();
type Model = SplitInto<(Linear<5, 1>, Linear<5, 2>, Linear<5, 3>)>;
let m: Model = dev.build_module();
let m: SplitInto<(Linear<5, 1, _>, Linear<5, 2, _>, Linear<5, 3, _>)> = dev.build_module();
let _: (
Tensor<Rank1<1>, _, _>,
Tensor<Rank1<2>, _, _>,
Expand All @@ -150,9 +148,13 @@ mod tests {

#[test]
fn test_split_into_4() {
type Model = SplitInto<(Linear<5, 1>, Linear<5, 2>, Linear<5, 3>, Linear<5, 4>)>;
let dev: TestDevice = Default::default();
let m: Model = dev.build_module();
let m: SplitInto<(
Linear<5, 1, _>,
Linear<5, 2, _>,
Linear<5, 3, _>,
Linear<5, 4, _>,
)> = dev.build_module();
let _: (
Tensor<Rank1<1>, _, _>,
Tensor<Rank1<2>, _, _>,
Expand All @@ -169,15 +171,14 @@ mod tests {

#[test]
fn test_split_into_5() {
type Model = SplitInto<(
Linear<5, 1>,
Linear<5, 2>,
Linear<5, 3>,
Linear<5, 4>,
Linear<5, 5>,
)>;
let dev: TestDevice = Default::default();
let m: Model = dev.build_module();
let m: SplitInto<(
Linear<5, 1, _>,
Linear<5, 2, _>,
Linear<5, 3, _>,
Linear<5, 4, _>,
Linear<5, 5, _>,
)> = dev.build_module();
let _: (
Tensor<Rank1<1>, _, _>,
Tensor<Rank1<2>, _, _>,
Expand All @@ -196,16 +197,15 @@ mod tests {

#[test]
fn test_split_into_6() {
type Model = SplitInto<(
Linear<5, 1>,
Linear<5, 2>,
Linear<5, 3>,
Linear<5, 4>,
Linear<5, 5>,
Linear<5, 6>,
)>;
let dev: TestDevice = Default::default();
let m: Model = dev.build_module();
let m: SplitInto<(
Linear<5, 1, _>,
Linear<5, 2, _>,
Linear<5, 3, _>,
Linear<5, 4, _>,
Linear<5, 5, _>,
Linear<5, 6, _>,
)> = dev.build_module();
let _: (
Tensor<Rank1<1>, _, _>,
Tensor<Rank1<2>, _, _>,
Expand Down
14 changes: 14 additions & 0 deletions src/optim/adam/cuda_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use crate::{shapes::Shape, tensor::Cuda};

impl super::AdamKernel<f32> for Cuda {
fn update<S: Shape>(
t: i32,
cfg: &super::AdamConfig<f32>,
param: &mut Self::Storage<S, f32>,
moment1: &mut Self::Storage<S, f32>,
moment2: &mut Self::Storage<S, f32>,
grad: Self::Storage<S, f32>,
) {
todo!()
}
}
3 changes: 3 additions & 0 deletions src/optim/adam/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
mod cpu_kernel;

#[cfg(feature = "cuda")]
mod cuda_kernel;

use std::marker::PhantomData;

use crate::{
Expand Down
14 changes: 14 additions & 0 deletions src/optim/rmsprop/cuda_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use crate::tensor::Cuda;

impl super::RMSpropKernel<f32> for Cuda {
fn update<S: crate::shapes::Shape>(
cfg: &super::RMSpropConfig<f32>,
param: &mut Self::Storage<S, f32>,
momentum: &mut Self::Storage<S, f32>,
square_avg: &mut Self::Storage<S, f32>,
grad_avg: &mut Self::Storage<S, f32>,
grad: Self::Storage<S, f32>,
) {
todo!()
}
}
3 changes: 3 additions & 0 deletions src/optim/rmsprop/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
mod cpu_kernel;

#[cfg(feature = "cuda")]
mod cuda_kernel;

use std::marker::PhantomData;

use crate::{
Expand Down
12 changes: 12 additions & 0 deletions src/optim/sgd/cuda_kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use crate::{shapes::*, tensor::Cuda};

impl<E: Dtype> super::SgdKernel<E> for Cuda {
fn update<S: Shape>(
cfg: &super::SgdConfig<E>,
param: &mut Self::Storage<S, E>,
velocity: &mut Self::Storage<S, E>,
grad: Self::Storage<S, E>,
) {
todo!()
}
}
3 changes: 3 additions & 0 deletions src/optim/sgd/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
mod cpu_kernel;

#[cfg(feature = "cuda")]
mod cuda_kernel;

use std::marker::PhantomData;

use crate::gradients::Gradients;
Expand Down
2 changes: 1 addition & 1 deletion src/shapes/shape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::{axes::*, ReduceShapeTo};

/// Represents a unit type, but no arithmetic.
pub trait Unit:
'static + Copy + Clone + Default + std::fmt::Debug + PartialOrd + Send + Sync
'static + Copy + Clone + Default + std::fmt::Debug + PartialOrd + Send + Sync + std::marker::Unpin
{
}
impl Unit for f32 {}
Expand Down
Loading