fff-rs · drahnr · Oct 30, 2021 · Oct 31, 2021 · Oct 31, 2021
diff --git a/coaster/tests/shared_memory_specs.rs b/coaster/tests/shared_memory_specs.rs
@@ -2,10 +2,10 @@ use coaster as co;
 
 #[cfg(test)]
 mod shared_memory_spec {
-    use super::co::prelude::*;
-    use super::co::tensor::Error;
     #[cfg(features = "cuda")]
     use super::co::frameworks::native::flatbox::FlatBox;
+    use super::co::prelude::*;
+    use super::co::tensor::Error;
 
     #[cfg(features = "cuda")]
     fn write_to_memory<T: Copy>(mem: &mut FlatBox, data: &[T]) {

diff --git a/juice/src/layers/loss/negative_log_likelihood.rs b/juice/src/layers/loss/negative_log_likelihood.rs
@@ -81,10 +81,13 @@ impl<B: IBackend> ComputeOutput<f32, B> for NegativeLogLikelihood {
         let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
         let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
 
-        let mut writable_loss = Vec::<f32>::new();
-        for &label_value in native_labels {
-            let probability_value = native_probabilities[label_value as usize];
-            writable_loss.push(-probability_value);
+        let mut writable_loss = Vec::<f32>::with_capacity(native_labels.len());
+        for (i, &label_value) in native_labels.iter().enumerate() {
+            let index = batch_size * i + label_value as usize;
+            let probability_value = native_probabilities[index];
+            let probability_value2 = probability_value * probability_value;
+            let probability_value3 = probability_value2 * probability_value;
+            writable_loss.push(-probability_value + probability_value2 / 2_f32 - probability_value3 / 3_f32);
         }
 
         let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val);
@@ -105,17 +108,21 @@ impl<B: IBackend> ComputeInputGradient<f32, B> for NegativeLogLikelihood {
         input_data: &[&SharedTensor<f32>],
         input_gradients: &mut [&mut SharedTensor<f32>],
     ) {
+        let probabilities = input_data[0];
         let labels = input_data[1];
         let batch_size = Self::batch_size(input_data[0].desc());
         let num_classes = self.num_classes;
 
         let native = native_backend();
         let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
+        let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
         let mut writable_gradient = vec![0f32; input_gradients[0].desc().size()];
 
         for (batch_n, &label_value) in native_labels.iter().enumerate() {
             let index = (num_classes * batch_n) + label_value as usize;
-            writable_gradient[index] = -1f32;
+            let probability_value = native_probabilities[index];
+            let probability_value2 = probability_value * probability_value;
+            writable_gradient[index] = -1_f32 + probability_value - probability_value2;
         }
         crate::util::write_to_memory(
             input_gradients[0].write_only(native.device()).unwrap(),

diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs
@@ -385,4 +385,60 @@ mod layer_spec {
             )
             .is_err());
     }
+
+    use juice::layers::NegativeLogLikelihoodConfig;
+    use juice::layers::SequentialConfig;
+
+    #[test]
+    fn nll_basic() {
+        const BATCH_SIZE: usize = 7;
+        const KLASS_COUNT: usize = 10;
+        let native_backend = native_backend();
+        let mut classifier_cfg = SequentialConfig::default();
+        classifier_cfg.add_input("network_out", &[BATCH_SIZE, KLASS_COUNT]);
+        classifier_cfg.add_input("label", &[BATCH_SIZE, 1]);
+        // set up nll loss
+        let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 };
+        let nll_cfg = LayerConfig::new("nll", nll_layer_cfg);
+        classifier_cfg.add_layer(nll_cfg);
+        let mut network = Layer::from_config(native_backend.clone(), &LayerConfig::new("foo", classifier_cfg));
+        let desc = [BATCH_SIZE, KLASS_COUNT];
+        let desc: &[usize] = &desc[..];
+        let mut input = SharedTensor::<f32>::new(&desc);
+        let mem = input.write_only(native_backend.device()).unwrap();
+        let input_data = (0..(KLASS_COUNT * BATCH_SIZE))
+            .into_iter()
+            .map(|x| x as f32 * 3.77)
+            .collect::<Vec<f32>>();
+        let input_data = &input_data[..];
+        juice::util::write_to_memory(mem, input_data);
+
+        // each input has exactly one label
+        let labels_desc = [BATCH_SIZE, 1];
+        let labels_desc = &labels_desc[..];
+        let mut labels = SharedTensor::<f32>::new(&labels_desc);
+
+        // pretend they have all different classes
+        let labels_data = (1..=(BATCH_SIZE * 1))
+            .into_iter()
+            .map(|x| x as f32)
+            .collect::<Vec<f32>>();
+        let mem = labels.write_only(native_backend.device()).unwrap();
+        juice::util::write_to_memory(mem, labels_data.as_slice());
+
+        let input = vec![
+            std::sync::Arc::new(std::sync::RwLock::new(input)),
+            std::sync::Arc::new(std::sync::RwLock::new(labels)),
+        ];
+
+        let out = network.forward(input.as_slice());
+        assert_eq!(out.len(), 1);
+        let out = &out[0];
+        let out = out.read().unwrap();
+        assert_eq!(out.desc().dims(), &vec![BATCH_SIZE, 1]);
+        let out = out.read(native_backend.device()).unwrap();
+        let out_mem = out.as_slice::<f32>();
+        assert_eq!(out_mem.len(), BATCH_SIZE);
+        assert!(out_mem[0] < 0_f32);
+    }
 }