From e3067083222ff975e71c250b56179951b5c47bc2 Mon Sep 17 00:00:00 2001
From: Bernhard Schuster <bernhard@ahoi.io>
Date: Sat, 30 Oct 2021 22:33:34 +0200
Subject: [PATCH 1/3] fix: negative log liekelihood missed an offset

Closes #150
---
 .../layers/loss/negative_log_likelihood.rs    |  8 ++-
 juice/tests/layer_specs.rs                    | 57 +++++++++++++++++++
 2 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs
index 05be58d10..a56f6c428 100644
--- a/juice/src/layers/loss/negative_log_likelihood.rs
+++ b/juice/src/layers/loss/negative_log_likelihood.rs
@@ -81,10 +81,12 @@ impl<B: IBackend> ComputeOutput<f32, B> for NegativeLogLikelihood {
         let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
         let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
 
-        let mut writable_loss = Vec::<f32>::new();
+        let mut writable_loss = Vec::<f32>::with_capacity(native_labels.len());
+        let mut offset = 0;
         for &label_value in native_labels {
-            let probability_value = native_probabilities[label_value as usize];
+            let probability_value = native_probabilities[offset + label_value as usize];
             writable_loss.push(-probability_value);
+            offset += batch_size;
         }
 
         let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val);
@@ -159,4 +161,4 @@ impl Into<LayerType> for NegativeLogLikelihoodConfig {
     fn into(self) -> LayerType {
         LayerType::NegativeLogLikelihood(self)
     }
-}
+}
\ No newline at end of file
diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs
index b05f7a2f5..1f22eb066 100644
--- a/juice/tests/layer_specs.rs
+++ b/juice/tests/layer_specs.rs
@@ -385,4 +385,61 @@ mod layer_spec {
             )
             .is_err());
     }
+
+    use juice::layers::SequentialConfig;
+    use juice::layers::NegativeLogLikelihoodConfig;
+
+    #[test]
+    fn nll_basic() {
+        const BATCH_SIZE: usize = 7;
+        const KLASS_COUNT: usize = 10;
+        let native_backend = native_backend();
+        let mut classifier_cfg = SequentialConfig::default();
+        classifier_cfg.add_input("network_out", &[BATCH_SIZE, KLASS_COUNT]);
+        classifier_cfg.add_input("label", &[BATCH_SIZE, 1]);
+        // set up nll loss
+        let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 };
+        let nll_cfg = LayerConfig::new("nll", nll_layer_cfg);
+        classifier_cfg.add_layer(nll_cfg);
+        let mut network = Layer::from_config(
+            native_backend.clone(),
+            &LayerConfig::new("foo", classifier_cfg),
+        );
+        let labels_data = (0..(BATCH_SIZE * KLASS_COUNT))
+            .into_iter()
+            .map(|x| x as f32)
+            .collect::<Vec<f32>>();
+        let desc = [BATCH_SIZE, KLASS_COUNT];
+        let desc: &[usize] = &desc[..];
+        let mut input = SharedTensor::<f32>::new(&desc);
+        let mem = input.write_only(native_backend.device()).unwrap();
+        let input_data = (0..(KLASS_COUNT * BATCH_SIZE)).into_iter().map(|x| x as f32 * 3.77).collect::<Vec<f32>>();
+        let input_data = &input_data[..];
+        juice::util::write_to_memory(mem, input_data);
+
+        // each input has exactly one label
+        let labels_desc = [BATCH_SIZE, 1];
+        let labels_desc = &labels_desc[..];
+        let mut labels = SharedTensor::<f32>::new(&labels_desc);
+
+        // pretend they have all different classes
+        let labels_data = (1..=(BATCH_SIZE * 1))
+            .into_iter()
+            .map(|x| x as f32)
+            .collect::<Vec<f32>>();
+        let mem = labels.write_only(native_backend.device()).unwrap();
+        juice::util::write_to_memory(mem, labels_data.as_slice());
+
+        let input = vec![
+            std::sync::Arc::new(std::sync::RwLock::new(input)),
+            std::sync::Arc::new(std::sync::RwLock::new(labels)),
+        ];
+
+        let output = network.forward(input.as_slice());
+
+        let x = output[0].read().unwrap();
+        dbg!(&x);
+        let out = x.read(native_backend.device()).unwrap();
+        dbg!(out.as_slice::<f32>());
+    }
 }

From 3da2df8222918720985d1af0f861b278588aa811 Mon Sep 17 00:00:00 2001
From: Bernhard Schuster <bernhard@ahoi.io>
Date: Sun, 31 Oct 2021 08:37:57 +0100
Subject: [PATCH 2/3] refactor/nll: improve approximation

---
 .../src/layers/loss/negative_log_likelihood.rs  | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs
index a56f6c428..de7450d1c 100644
--- a/juice/src/layers/loss/negative_log_likelihood.rs
+++ b/juice/src/layers/loss/negative_log_likelihood.rs
@@ -82,11 +82,12 @@ impl<B: IBackend> ComputeOutput<f32, B> for NegativeLogLikelihood {
         let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
 
         let mut writable_loss = Vec::<f32>::with_capacity(native_labels.len());
-        let mut offset = 0;
-        for &label_value in native_labels {
-            let probability_value = native_probabilities[offset + label_value as usize];
-            writable_loss.push(-probability_value);
-            offset += batch_size;
+        for (i, &label_value) in native_labels.iter().enumerate() {
+            let index = batch_size * i + label_value as usize;
+            let probability_value = native_probabilities[index];
+            let probability_value2 = probability_value * probability_value;
+            let probability_value3 = probability_value2 * probability_value;
+            writable_loss.push(-probability_value + probability_value2 / 2_f32 - probability_value3 / 3_f32);
         }
 
         let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val);
@@ -107,17 +108,21 @@ impl<B: IBackend> ComputeInputGradient<f32, B> for NegativeLogLikelihood {
         input_data: &[&SharedTensor<f32>],
         input_gradients: &mut [&mut SharedTensor<f32>],
     ) {
+        let probabilities = input_data[0];
         let labels = input_data[1];
         let batch_size = Self::batch_size(input_data[0].desc());
         let num_classes = self.num_classes;
 
         let native = native_backend();
         let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
+        let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
         let mut writable_gradient = vec![0f32; input_gradients[0].desc().size()];
 
         for (batch_n, &label_value) in native_labels.iter().enumerate() {
             let index = (num_classes * batch_n) + label_value as usize;
-            writable_gradient[index] = -1f32;
+            let probability_value = native_probabilities[index];
+            let probability_value2 = probability_value * probability_value;
+            writable_gradient[index] = -1_f32 + probability_value - probability_value2;
         }
         crate::util::write_to_memory(
             input_gradients[0].write_only(native.device()).unwrap(),

From 290c0b7d407bf51cdbd2284c71f9d8c68539d93e Mon Sep 17 00:00:00 2001
From: Bernhard Schuster <bernhard@ahoi.io>
Date: Sun, 31 Oct 2021 12:00:43 +0100
Subject: [PATCH 3/3] chore/layer/nll: fmt, cleanup, asserts

---
 coaster/tests/shared_memory_specs.rs          |  4 +--
 .../layers/loss/negative_log_likelihood.rs    |  2 +-
 juice/tests/layer_specs.rs                    | 31 +++++++++----------
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/coaster/tests/shared_memory_specs.rs b/coaster/tests/shared_memory_specs.rs
index c75827187..48885378e 100644
--- a/coaster/tests/shared_memory_specs.rs
+++ b/coaster/tests/shared_memory_specs.rs
@@ -2,10 +2,10 @@ use coaster as co;
 
 #[cfg(test)]
 mod shared_memory_spec {
-    use super::co::prelude::*;
-    use super::co::tensor::Error;
     #[cfg(features = "cuda")]
     use super::co::frameworks::native::flatbox::FlatBox;
+    use super::co::prelude::*;
+    use super::co::tensor::Error;
 
     #[cfg(features = "cuda")]
     fn write_to_memory<T: Copy>(mem: &mut FlatBox, data: &[T]) {
diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs
index de7450d1c..ce08bd3b8 100644
--- a/juice/src/layers/loss/negative_log_likelihood.rs
+++ b/juice/src/layers/loss/negative_log_likelihood.rs
@@ -166,4 +166,4 @@ impl Into<LayerType> for NegativeLogLikelihoodConfig {
     fn into(self) -> LayerType {
         LayerType::NegativeLogLikelihood(self)
     }
-}
\ No newline at end of file
+}
diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs
index 1f22eb066..a7f7147d7 100644
--- a/juice/tests/layer_specs.rs
+++ b/juice/tests/layer_specs.rs
@@ -386,8 +386,8 @@ mod layer_spec {
             .is_err());
     }
 
-    use juice::layers::SequentialConfig;
     use juice::layers::NegativeLogLikelihoodConfig;
+    use juice::layers::SequentialConfig;
 
     #[test]
     fn nll_basic() {
@@ -401,19 +401,15 @@ mod layer_spec {
         let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 };
         let nll_cfg = LayerConfig::new("nll", nll_layer_cfg);
         classifier_cfg.add_layer(nll_cfg);
-        let mut network = Layer::from_config(
-            native_backend.clone(),
-            &LayerConfig::new("foo", classifier_cfg),
-        );
-        let labels_data = (0..(BATCH_SIZE * KLASS_COUNT))
-            .into_iter()
-            .map(|x| x as f32)
-            .collect::<Vec<f32>>();
+        let mut network = Layer::from_config(native_backend.clone(), &LayerConfig::new("foo", classifier_cfg));
         let desc = [BATCH_SIZE, KLASS_COUNT];
         let desc: &[usize] = &desc[..];
         let mut input = SharedTensor::<f32>::new(&desc);
         let mem = input.write_only(native_backend.device()).unwrap();
-        let input_data = (0..(KLASS_COUNT * BATCH_SIZE)).into_iter().map(|x| x as f32 * 3.77).collect::<Vec<f32>>();
+        let input_data = (0..(KLASS_COUNT * BATCH_SIZE))
+            .into_iter()
+            .map(|x| x as f32 * 3.77)
+            .collect::<Vec<f32>>();
         let input_data = &input_data[..];
         juice::util::write_to_memory(mem, input_data);
 
@@ -435,11 +431,14 @@ mod layer_spec {
             std::sync::Arc::new(std::sync::RwLock::new(labels)),
         ];
 
-        let output = network.forward(input.as_slice());
-
-        let x = output[0].read().unwrap();
-        dbg!(&x);
-        let out = x.read(native_backend.device()).unwrap();
-        dbg!(out.as_slice::<f32>());
+        let out = network.forward(input.as_slice());
+        assert_eq!(out.len(), 1);
+        let out = &out[0];
+        let out = out.read().unwrap();
+        assert_eq!(out.desc().dims(), &vec![BATCH_SIZE, 1]);
+        let out = out.read(native_backend.device()).unwrap();
+        let out_mem = out.as_slice::<f32>();
+        assert_eq!(out_mem.len(), BATCH_SIZE);
+        assert!(out_mem[0] < 0_f32);
     }
 }