From e3067083222ff975e71c250b56179951b5c47bc2 Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Sat, 30 Oct 2021 22:33:34 +0200 Subject: [PATCH 1/3] fix: negative log liekelihood missed an offset Closes #150 --- .../layers/loss/negative_log_likelihood.rs | 8 ++- juice/tests/layer_specs.rs | 57 +++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs index 05be58d10..a56f6c428 100644 --- a/juice/src/layers/loss/negative_log_likelihood.rs +++ b/juice/src/layers/loss/negative_log_likelihood.rs @@ -81,10 +81,12 @@ impl ComputeOutput for NegativeLogLikelihood { let native_labels = labels.read(native.device()).unwrap().as_slice::(); let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::(); - let mut writable_loss = Vec::::new(); + let mut writable_loss = Vec::::with_capacity(native_labels.len()); + let mut offset = 0; for &label_value in native_labels { - let probability_value = native_probabilities[label_value as usize]; + let probability_value = native_probabilities[offset + label_value as usize]; writable_loss.push(-probability_value); + offset += batch_size; } let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val); @@ -159,4 +161,4 @@ impl Into for NegativeLogLikelihoodConfig { fn into(self) -> LayerType { LayerType::NegativeLogLikelihood(self) } -} +} \ No newline at end of file diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs index b05f7a2f5..1f22eb066 100644 --- a/juice/tests/layer_specs.rs +++ b/juice/tests/layer_specs.rs @@ -385,4 +385,61 @@ mod layer_spec { ) .is_err()); } + + use juice::layers::SequentialConfig; + use juice::layers::NegativeLogLikelihoodConfig; + + #[test] + fn nll_basic() { + const BATCH_SIZE: usize = 7; + const KLASS_COUNT: usize = 10; + let native_backend = native_backend(); + let mut classifier_cfg = SequentialConfig::default(); + classifier_cfg.add_input("network_out", &[BATCH_SIZE, KLASS_COUNT]); + classifier_cfg.add_input("label", &[BATCH_SIZE, 1]); + // set up nll loss + let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 }; + let nll_cfg = LayerConfig::new("nll", nll_layer_cfg); + classifier_cfg.add_layer(nll_cfg); + let mut network = Layer::from_config( + native_backend.clone(), + &LayerConfig::new("foo", classifier_cfg), + ); + let labels_data = (0..(BATCH_SIZE * KLASS_COUNT)) + .into_iter() + .map(|x| x as f32) + .collect::>(); + let desc = [BATCH_SIZE, KLASS_COUNT]; + let desc: &[usize] = &desc[..]; + let mut input = SharedTensor::::new(&desc); + let mem = input.write_only(native_backend.device()).unwrap(); + let input_data = (0..(KLASS_COUNT * BATCH_SIZE)).into_iter().map(|x| x as f32 * 3.77).collect::>(); + let input_data = &input_data[..]; + juice::util::write_to_memory(mem, input_data); + + // each input has exactly one label + let labels_desc = [BATCH_SIZE, 1]; + let labels_desc = &labels_desc[..]; + let mut labels = SharedTensor::::new(&labels_desc); + + // pretend they have all different classes + let labels_data = (1..=(BATCH_SIZE * 1)) + .into_iter() + .map(|x| x as f32) + .collect::>(); + let mem = labels.write_only(native_backend.device()).unwrap(); + juice::util::write_to_memory(mem, labels_data.as_slice()); + + let input = vec![ + std::sync::Arc::new(std::sync::RwLock::new(input)), + std::sync::Arc::new(std::sync::RwLock::new(labels)), + ]; + + let output = network.forward(input.as_slice()); + + let x = output[0].read().unwrap(); + dbg!(&x); + let out = x.read(native_backend.device()).unwrap(); + dbg!(out.as_slice::()); + } } From 3da2df8222918720985d1af0f861b278588aa811 Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Sun, 31 Oct 2021 08:37:57 +0100 Subject: [PATCH 2/3] refactor/nll: improve approximation --- .../src/layers/loss/negative_log_likelihood.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs index a56f6c428..de7450d1c 100644 --- a/juice/src/layers/loss/negative_log_likelihood.rs +++ b/juice/src/layers/loss/negative_log_likelihood.rs @@ -82,11 +82,12 @@ impl ComputeOutput for NegativeLogLikelihood { let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::(); let mut writable_loss = Vec::::with_capacity(native_labels.len()); - let mut offset = 0; - for &label_value in native_labels { - let probability_value = native_probabilities[offset + label_value as usize]; - writable_loss.push(-probability_value); - offset += batch_size; + for (i, &label_value) in native_labels.iter().enumerate() { + let index = batch_size * i + label_value as usize; + let probability_value = native_probabilities[index]; + let probability_value2 = probability_value * probability_value; + let probability_value3 = probability_value2 * probability_value; + writable_loss.push(-probability_value + probability_value2 / 2_f32 - probability_value3 / 3_f32); } let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val); @@ -107,17 +108,21 @@ impl ComputeInputGradient for NegativeLogLikelihood { input_data: &[&SharedTensor], input_gradients: &mut [&mut SharedTensor], ) { + let probabilities = input_data[0]; let labels = input_data[1]; let batch_size = Self::batch_size(input_data[0].desc()); let num_classes = self.num_classes; let native = native_backend(); let native_labels = labels.read(native.device()).unwrap().as_slice::(); + let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::(); let mut writable_gradient = vec![0f32; input_gradients[0].desc().size()]; for (batch_n, &label_value) in native_labels.iter().enumerate() { let index = (num_classes * batch_n) + label_value as usize; - writable_gradient[index] = -1f32; + let probability_value = native_probabilities[index]; + let probability_value2 = probability_value * probability_value; + writable_gradient[index] = -1_f32 + probability_value - probability_value2; } crate::util::write_to_memory( input_gradients[0].write_only(native.device()).unwrap(), From 290c0b7d407bf51cdbd2284c71f9d8c68539d93e Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Sun, 31 Oct 2021 12:00:43 +0100 Subject: [PATCH 3/3] chore/layer/nll: fmt, cleanup, asserts --- coaster/tests/shared_memory_specs.rs | 4 +-- .../layers/loss/negative_log_likelihood.rs | 2 +- juice/tests/layer_specs.rs | 31 +++++++++---------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/coaster/tests/shared_memory_specs.rs b/coaster/tests/shared_memory_specs.rs index c75827187..48885378e 100644 --- a/coaster/tests/shared_memory_specs.rs +++ b/coaster/tests/shared_memory_specs.rs @@ -2,10 +2,10 @@ use coaster as co; #[cfg(test)] mod shared_memory_spec { - use super::co::prelude::*; - use super::co::tensor::Error; #[cfg(features = "cuda")] use super::co::frameworks::native::flatbox::FlatBox; + use super::co::prelude::*; + use super::co::tensor::Error; #[cfg(features = "cuda")] fn write_to_memory(mem: &mut FlatBox, data: &[T]) { diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs index de7450d1c..ce08bd3b8 100644 --- a/juice/src/layers/loss/negative_log_likelihood.rs +++ b/juice/src/layers/loss/negative_log_likelihood.rs @@ -166,4 +166,4 @@ impl Into for NegativeLogLikelihoodConfig { fn into(self) -> LayerType { LayerType::NegativeLogLikelihood(self) } -} \ No newline at end of file +} diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs index 1f22eb066..a7f7147d7 100644 --- a/juice/tests/layer_specs.rs +++ b/juice/tests/layer_specs.rs @@ -386,8 +386,8 @@ mod layer_spec { .is_err()); } - use juice::layers::SequentialConfig; use juice::layers::NegativeLogLikelihoodConfig; + use juice::layers::SequentialConfig; #[test] fn nll_basic() { @@ -401,19 +401,15 @@ mod layer_spec { let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 }; let nll_cfg = LayerConfig::new("nll", nll_layer_cfg); classifier_cfg.add_layer(nll_cfg); - let mut network = Layer::from_config( - native_backend.clone(), - &LayerConfig::new("foo", classifier_cfg), - ); - let labels_data = (0..(BATCH_SIZE * KLASS_COUNT)) - .into_iter() - .map(|x| x as f32) - .collect::>(); + let mut network = Layer::from_config(native_backend.clone(), &LayerConfig::new("foo", classifier_cfg)); let desc = [BATCH_SIZE, KLASS_COUNT]; let desc: &[usize] = &desc[..]; let mut input = SharedTensor::::new(&desc); let mem = input.write_only(native_backend.device()).unwrap(); - let input_data = (0..(KLASS_COUNT * BATCH_SIZE)).into_iter().map(|x| x as f32 * 3.77).collect::>(); + let input_data = (0..(KLASS_COUNT * BATCH_SIZE)) + .into_iter() + .map(|x| x as f32 * 3.77) + .collect::>(); let input_data = &input_data[..]; juice::util::write_to_memory(mem, input_data); @@ -435,11 +431,14 @@ mod layer_spec { std::sync::Arc::new(std::sync::RwLock::new(labels)), ]; - let output = network.forward(input.as_slice()); - - let x = output[0].read().unwrap(); - dbg!(&x); - let out = x.read(native_backend.device()).unwrap(); - dbg!(out.as_slice::()); + let out = network.forward(input.as_slice()); + assert_eq!(out.len(), 1); + let out = &out[0]; + let out = out.read().unwrap(); + assert_eq!(out.desc().dims(), &vec![BATCH_SIZE, 1]); + let out = out.read(native_backend.device()).unwrap(); + let out_mem = out.as_slice::(); + assert_eq!(out_mem.len(), BATCH_SIZE); + assert!(out_mem[0] < 0_f32); } }