Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixup nll #151

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions coaster/tests/shared_memory_specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ use coaster as co;

#[cfg(test)]
mod shared_memory_spec {
use super::co::prelude::*;
use super::co::tensor::Error;
#[cfg(features = "cuda")]
use super::co::frameworks::native::flatbox::FlatBox;
use super::co::prelude::*;
use super::co::tensor::Error;

#[cfg(features = "cuda")]
fn write_to_memory<T: Copy>(mem: &mut FlatBox, data: &[T]) {
Expand Down
17 changes: 12 additions & 5 deletions juice/src/layers/loss/negative_log_likelihood.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,13 @@ impl<B: IBackend> ComputeOutput<f32, B> for NegativeLogLikelihood {
let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();

let mut writable_loss = Vec::<f32>::new();
for &label_value in native_labels {
let probability_value = native_probabilities[label_value as usize];
writable_loss.push(-probability_value);
let mut writable_loss = Vec::<f32>::with_capacity(native_labels.len());
for (i, &label_value) in native_labels.iter().enumerate() {
let index = batch_size * i + label_value as usize;
let probability_value = native_probabilities[index];
let probability_value2 = probability_value * probability_value;
let probability_value3 = probability_value2 * probability_value;
writable_loss.push(-probability_value + probability_value2 / 2_f32 - probability_value3 / 3_f32);
}

let mut loss = writable_loss.iter().fold(0f32, |sum, &val| sum + val);
Expand All @@ -105,17 +108,21 @@ impl<B: IBackend> ComputeInputGradient<f32, B> for NegativeLogLikelihood {
input_data: &[&SharedTensor<f32>],
input_gradients: &mut [&mut SharedTensor<f32>],
) {
let probabilities = input_data[0];
let labels = input_data[1];
let batch_size = Self::batch_size(input_data[0].desc());
let num_classes = self.num_classes;

let native = native_backend();
let native_labels = labels.read(native.device()).unwrap().as_slice::<f32>();
let native_probabilities = probabilities.read(native.device()).unwrap().as_slice::<f32>();
let mut writable_gradient = vec![0f32; input_gradients[0].desc().size()];

for (batch_n, &label_value) in native_labels.iter().enumerate() {
let index = (num_classes * batch_n) + label_value as usize;
writable_gradient[index] = -1f32;
let probability_value = native_probabilities[index];
let probability_value2 = probability_value * probability_value;
writable_gradient[index] = -1_f32 + probability_value - probability_value2;
}
crate::util::write_to_memory(
input_gradients[0].write_only(native.device()).unwrap(),
Expand Down
56 changes: 56 additions & 0 deletions juice/tests/layer_specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,4 +385,60 @@ mod layer_spec {
)
.is_err());
}

use juice::layers::NegativeLogLikelihoodConfig;
use juice::layers::SequentialConfig;

#[test]
fn nll_basic() {
const BATCH_SIZE: usize = 7;
const KLASS_COUNT: usize = 10;
let native_backend = native_backend();
let mut classifier_cfg = SequentialConfig::default();
classifier_cfg.add_input("network_out", &[BATCH_SIZE, KLASS_COUNT]);
classifier_cfg.add_input("label", &[BATCH_SIZE, 1]);
// set up nll loss
let nll_layer_cfg = NegativeLogLikelihoodConfig { num_classes: 10 };
let nll_cfg = LayerConfig::new("nll", nll_layer_cfg);
classifier_cfg.add_layer(nll_cfg);
let mut network = Layer::from_config(native_backend.clone(), &LayerConfig::new("foo", classifier_cfg));
let desc = [BATCH_SIZE, KLASS_COUNT];
let desc: &[usize] = &desc[..];
let mut input = SharedTensor::<f32>::new(&desc);
let mem = input.write_only(native_backend.device()).unwrap();
let input_data = (0..(KLASS_COUNT * BATCH_SIZE))
.into_iter()
.map(|x| x as f32 * 3.77)
.collect::<Vec<f32>>();
let input_data = &input_data[..];
juice::util::write_to_memory(mem, input_data);

// each input has exactly one label
let labels_desc = [BATCH_SIZE, 1];
let labels_desc = &labels_desc[..];
let mut labels = SharedTensor::<f32>::new(&labels_desc);

// pretend they have all different classes
let labels_data = (1..=(BATCH_SIZE * 1))
.into_iter()
.map(|x| x as f32)
.collect::<Vec<f32>>();
let mem = labels.write_only(native_backend.device()).unwrap();
juice::util::write_to_memory(mem, labels_data.as_slice());

let input = vec![
std::sync::Arc::new(std::sync::RwLock::new(input)),
std::sync::Arc::new(std::sync::RwLock::new(labels)),
];

let out = network.forward(input.as_slice());
assert_eq!(out.len(), 1);
let out = &out[0];
let out = out.read().unwrap();
assert_eq!(out.desc().dims(), &vec![BATCH_SIZE, 1]);
let out = out.read(native_backend.device()).unwrap();
let out_mem = out.as_slice::<f32>();
assert_eq!(out_mem.len(), BATCH_SIZE);
assert!(out_mem[0] < 0_f32);
}
}