diff --git a/Cargo.lock b/Cargo.lock index 1ee6104ba..300234f7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -954,6 +954,7 @@ dependencies = [ name = "juice" version = "0.2.5" dependencies = [ + "anyhow", "capnp", "capnpc", "coaster", diff --git a/juice-examples/mackey-glass-rnn-regression/src/main.rs b/juice-examples/mackey-glass-rnn-regression/src/main.rs index 3238027a3..6ae5074f8 100644 --- a/juice-examples/mackey-glass-rnn-regression/src/main.rs +++ b/juice-examples/mackey-glass-rnn-regression/src/main.rs @@ -104,7 +104,7 @@ fn create_network(batch_size: usize, columns: usize) -> SequentialConfig { // Reshape the input into NCHW Format net_cfg.add_layer(LayerConfig::new( "reshape", - LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size, DATA_COLUMNS, 1, 1])), + LayerType::Reshape(ReshapeConfig::of_shape(&[-1, DATA_COLUMNS as isize, 1, 1])), )); net_cfg.add_layer(LayerConfig::new( @@ -168,7 +168,6 @@ pub(crate) fn train( // Initialise a Sequential Layer let net_cfg = create_network(batch_size, DATA_COLUMNS); let mut solver = add_solver(backend, net_cfg, batch_size, learning_rate, momentum); - // Define Input & Labels let input = SharedTensor::::new(&[batch_size, 1, DATA_COLUMNS]); let input_lock = Arc::new(RwLock::new(input)); @@ -224,7 +223,8 @@ pub(crate) fn train( pub(crate) fn test(backend: Rc>, batch_size: usize, file: &Path) -> Result<(), Box> { // Load in a pre-trained network let mut network: Layer> = Layer::>::load(backend, file)?; - + dbg!(&network); + panic!("End"); // Define Input & Labels let input = SharedTensor::::new(&[batch_size, 1, DATA_COLUMNS]); let input_lock = Arc::new(RwLock::new(input)); @@ -262,7 +262,7 @@ pub(crate) fn test(backend: Rc>, batch_size: usize, file: &Path) - } fn main() { - env_logger::builder().filter_level(log::LevelFilter::Info).init(); + env_logger::builder().filter_level(log::LevelFilter::Trace).init(); // Parse Arguments let args: Args = docopt::Docopt::new(MAIN_USAGE) .and_then(|d| d.deserialize()) diff --git a/juice-examples/mnist-image-multiclass-classification/src/main.rs b/juice-examples/mnist-image-multiclass-classification/src/main.rs index d306d318c..7a4f3fffa 100644 --- a/juice-examples/mnist-image-multiclass-classification/src/main.rs +++ b/juice-examples/mnist-image-multiclass-classification/src/main.rs @@ -140,7 +140,7 @@ fn add_conv_net( ) -> SequentialConfig { net_cfg.add_layer(LayerConfig::new( "reshape", - ReshapeConfig::of_shape(&[batch_size, 1, pixel_dim, pixel_dim]), + ReshapeConfig::of_shape(&[batch_size as isize, 1, pixel_dim as isize, pixel_dim as isize]), )); net_cfg.add_layer(LayerConfig::new( "conv", @@ -192,7 +192,7 @@ fn add_mlp( ) -> SequentialConfig { net_cfg.add_layer(LayerConfig::new( "reshape", - LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size, pixel_count])), + LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size as isize, pixel_count as isize])), )); net_cfg.add_layer(LayerConfig::new( "linear1", diff --git a/juice/Cargo.toml b/juice/Cargo.toml index 4c2e7ca5d..6a8001502 100644 --- a/juice/Cargo.toml +++ b/juice/Cargo.toml @@ -28,6 +28,7 @@ rand = "0.8" num = "0.4" capnp = "0.14" timeit = "0.1" +anyhow = "1.0" [build-dependencies] capnpc = "0.14" diff --git a/juice/capnp/juice.capnp b/juice/capnp/juice.capnp index 315046b5a..43b3d9154 100644 --- a/juice/capnp/juice.capnp +++ b/juice/capnp/juice.capnp @@ -108,5 +108,5 @@ struct NegativeLogLikelihoodConfig { } struct ReshapeConfig { - shape @0 :List(UInt64); + shape @0 :List(Int64); } diff --git a/juice/src/layers/utility/reshape.rs b/juice/src/layers/utility/reshape.rs index 2d1a699e6..f75474359 100644 --- a/juice/src/layers/utility/reshape.rs +++ b/juice/src/layers/utility/reshape.rs @@ -17,15 +17,16 @@ //! - `W` : width use crate::capnp_util::*; -use crate::co::{IBackend, SharedTensor}; +use crate::co::{IBackend, SharedTensor, TensorDesc}; use crate::juice_capnp::reshape_config as capnp_config; use crate::layer::*; use crate::util::ArcLock; +use anyhow::{anyhow, Result}; #[derive(Debug, Clone)] /// Reshape Utility Layer pub struct Reshape { - shape: Vec, + shape: Vec, } impl Reshape { @@ -35,6 +36,38 @@ impl Reshape { shape: config.shape.clone(), } } + + fn evaluate_shape(&self, input_shape: &TensorDesc) -> Result> { + dbg!(&self.shape); + dbg!(input_shape); + let unknown_dimensions: usize = self.shape.iter().filter(|x| **x == -1).count(); + let invalid_dimensions: usize = self.shape.iter().filter(|x| **x < -1).count(); + if invalid_dimensions > 0 { + return Err(anyhow!("Invalid elements provided to Reshape")) + } + return match unknown_dimensions { + 0 => Ok(self.shape.clone().into_iter().map(|x| x as usize).collect()), + 1 => { + let total_prior_elements: usize = input_shape.iter().product(); + let known_elements: usize = self.shape.iter().filter(|x| **x > -1).product::() as usize; + dbg!(total_prior_elements); + dbg!(known_elements); + if total_prior_elements != (total_prior_elements / known_elements * known_elements) { + Err(anyhow!( + "Dimensions {:?} do not cleanly reshape into {:?}", + input_shape, self.shape + )) + } else { + let unknown_element: usize = total_prior_elements / known_elements; + Ok(self.shape + .iter() + .map(|x| if *x == -1 { unknown_element } else { *x as usize }) + .collect()) + } + } + _ => Err(anyhow!("More than 2 unknown elements provided to Reshape")), + } + } } impl ILayer for Reshape { @@ -56,8 +89,19 @@ impl ILayer for Reshape { output_data: &mut Vec>>, output_gradient: &mut Vec>>, ) { - output_data[0].write().unwrap().resize(&self.shape).unwrap(); - output_gradient[0].write().unwrap().resize(&self.shape).unwrap(); + // Shape Evaluation has to be done at run-time. + if !input_data.is_empty() { + let output_shape : Vec = match input_data[0].read() { + Ok(tensor) => self.evaluate_shape(tensor.desc()).unwrap(), + Err(E) => panic!("") + }; + output_data[0].write().unwrap().resize(&output_shape).unwrap(); + let output_grad_shape : Vec = match input_gradient[0].read() { + Ok(tensor) => self.evaluate_shape(tensor.desc()).unwrap(), + Err(E) => panic!("") + }; + output_gradient[0].write().unwrap().resize(&output_grad_shape).unwrap(); + } } } @@ -92,15 +136,20 @@ impl ComputeParametersGradient for Reshape {} pub struct ReshapeConfig { /// The target shape that the input should assume. /// - /// Preceding dimensions are treated as independent inputs + /// Preceding dimensions are treated as independent inputs. At most one value can be -1, + /// indicating that the size of that element should be the remaining element dimensions, i.e. + /// Input [2,8] -> Reshape [-1, 4] -> Output [4, 4] + /// As the input has 16 elements, 16 / 4 is 4, so the output is [4, 4] + /// + /// Causes an error if the total elements are incompatible with the dimensions selected. /// /// Defaults to `1` - pub shape: Vec, + pub shape: Vec, } impl ReshapeConfig { /// Create a ReshapeConfig that describes a Reshape layer with a provided shape. - pub fn of_shape(shape: &[usize]) -> ReshapeConfig { + pub fn of_shape(shape: &[isize]) -> ReshapeConfig { ReshapeConfig { shape: shape.to_owned(), } @@ -114,7 +163,7 @@ impl<'a> CapnpWrite<'a> for ReshapeConfig { fn write_capnp(&self, builder: &mut Self::Builder) { let mut shape = builder.reborrow().init_shape(self.shape.len() as u32); for (i, dim) in self.shape.iter().enumerate() { - shape.set(i as u32, *dim as u64); + shape.set(i as u32, *dim as i64); } } } @@ -124,9 +173,9 @@ impl<'a> CapnpRead<'a> for ReshapeConfig { fn read_capnp(reader: Self::Reader) -> Self { let read_shape = reader.get_shape().unwrap(); - let mut shape = Vec::new(); + let mut shape: Vec = Vec::new(); for i in 0..read_shape.len() { - shape.push(read_shape.get(i) as usize) + shape.push(read_shape.get(i) as isize) } ReshapeConfig { shape: shape }