fff-rs · orangmuda · Apr 25, 2021 · drahnr · Oct 19, 2021 · drahnr
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/juice-examples/mackey-glass-rnn-regression/src/main.rs b/juice-examples/mackey-glass-rnn-regression/src/main.rs
@@ -104,7 +104,7 @@ fn create_network(batch_size: usize, columns: usize) -> SequentialConfig {
     // Reshape the input into NCHW Format
     net_cfg.add_layer(LayerConfig::new(
         "reshape",
-        LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size, DATA_COLUMNS, 1, 1])),
+        LayerType::Reshape(ReshapeConfig::of_shape(&[-1, DATA_COLUMNS as isize, 1, 1])),
     ));
 
     net_cfg.add_layer(LayerConfig::new(
@@ -168,7 +168,6 @@ pub(crate) fn train(
     // Initialise a Sequential Layer
     let net_cfg = create_network(batch_size, DATA_COLUMNS);
     let mut solver = add_solver(backend, net_cfg,  batch_size, learning_rate, momentum);
-
     // Define Input & Labels
     let input = SharedTensor::<f32>::new(&[batch_size, 1, DATA_COLUMNS]);
     let input_lock = Arc::new(RwLock::new(input));
@@ -224,7 +223,8 @@ pub(crate) fn train(
 pub(crate) fn test(backend: Rc<Backend<Cuda>>, batch_size: usize, file: &Path) -> Result<(), Box<dyn std::error::Error>> {
     // Load in a pre-trained network
     let mut network: Layer<Backend<Cuda>> = Layer::<Backend<Cuda>>::load(backend, file)?;
-
+    dbg!(&network);
+    panic!("End");
     // Define Input & Labels
     let input = SharedTensor::<f32>::new(&[batch_size, 1, DATA_COLUMNS]);
     let input_lock = Arc::new(RwLock::new(input));
@@ -262,7 +262,7 @@ pub(crate) fn test(backend: Rc<Backend<Cuda>>, batch_size: usize, file: &Path) -
 }
 
 fn main() {
-    env_logger::builder().filter_level(log::LevelFilter::Info).init();
+    env_logger::builder().filter_level(log::LevelFilter::Trace).init();
     // Parse Arguments
     let args: Args = docopt::Docopt::new(MAIN_USAGE)
         .and_then(|d| d.deserialize())

diff --git a/juice-examples/mnist-image-multiclass-classification/src/main.rs b/juice-examples/mnist-image-multiclass-classification/src/main.rs
@@ -140,7 +140,7 @@ fn add_conv_net(
 ) -> SequentialConfig {
     net_cfg.add_layer(LayerConfig::new(
         "reshape",
-        ReshapeConfig::of_shape(&[batch_size, 1, pixel_dim, pixel_dim]),
+        ReshapeConfig::of_shape(&[batch_size as isize, 1, pixel_dim as isize, pixel_dim as isize]),
     ));
     net_cfg.add_layer(LayerConfig::new(
         "conv",
@@ -192,7 +192,7 @@ fn add_mlp(
 ) -> SequentialConfig {
     net_cfg.add_layer(LayerConfig::new(
         "reshape",
-        LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size, pixel_count])),
+        LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size as isize, pixel_count as isize])),
     ));
     net_cfg.add_layer(LayerConfig::new(
         "linear1",

diff --git a/juice/Cargo.toml b/juice/Cargo.toml
@@ -28,6 +28,7 @@ rand = "0.8"
 num = "0.4"
 capnp = "0.14"
 timeit = "0.1"
+anyhow = "1.0"
 
 [build-dependencies]
 capnpc = "0.14"

diff --git a/juice/capnp/juice.capnp b/juice/capnp/juice.capnp
@@ -108,5 +108,5 @@ struct NegativeLogLikelihoodConfig {
 }
 
 struct ReshapeConfig {
-  shape @0 :List(UInt64);
+  shape @0 :List(Int64);
 }
diff --git a/juice/src/layers/utility/reshape.rs b/juice/src/layers/utility/reshape.rs
@@ -17,15 +17,16 @@
 //! - `W` : width
 
 use crate::capnp_util::*;
-use crate::co::{IBackend, SharedTensor};
+use crate::co::{IBackend, SharedTensor, TensorDesc};
 use crate::juice_capnp::reshape_config as capnp_config;
 use crate::layer::*;
 use crate::util::ArcLock;
+use anyhow::{anyhow, Result};
 
 #[derive(Debug, Clone)]
 /// Reshape Utility Layer
 pub struct Reshape {
-    shape: Vec<usize>,
+    shape: Vec<isize>,
 }
 
 impl Reshape {
@@ -35,6 +36,38 @@ impl Reshape {
             shape: config.shape.clone(),
         }
     }
+
+    fn evaluate_shape(&self, input_shape: &TensorDesc) -> Result<Vec<usize>> {
+        dbg!(&self.shape);
+        dbg!(input_shape);
+        let unknown_dimensions: usize = self.shape.iter().filter(|x| **x == -1).count();
+        let invalid_dimensions: usize = self.shape.iter().filter(|x| **x < -1).count();
+        if invalid_dimensions > 0 {
+            return Err(anyhow!("Invalid elements provided to Reshape"))
+        }
+        return match unknown_dimensions {
+            0 => Ok(self.shape.clone().into_iter().map(|x| x as usize).collect()),
+            1 => {
+                let total_prior_elements: usize = input_shape.iter().product();
+                let known_elements: usize = self.shape.iter().filter(|x| **x > -1).product::<isize>() as usize;
+                dbg!(total_prior_elements);
+                dbg!(known_elements);
+                if total_prior_elements != (total_prior_elements / known_elements * known_elements) {
+                    Err(anyhow!(
+                        "Dimensions {:?} do not cleanly reshape into {:?}",
+                        input_shape, self.shape
+                    ))
+                } else {
+                    let unknown_element: usize = total_prior_elements / known_elements;
+                    Ok(self.shape
+                        .iter()
+                        .map(|x| if *x == -1 { unknown_element } else { *x as usize })
+                        .collect())
+                }
+            }
+            _ => Err(anyhow!("More than 2 unknown elements provided to Reshape")),
+        }
+    }
 }
 
 impl<B: IBackend> ILayer<B> for Reshape {
@@ -56,8 +89,19 @@ impl<B: IBackend> ILayer<B> for Reshape {
         output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
         output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
     ) {
-        output_data[0].write().unwrap().resize(&self.shape).unwrap();
-        output_gradient[0].write().unwrap().resize(&self.shape).unwrap();
+        // Shape Evaluation has to be done at run-time.
+        if !input_data.is_empty() {
+            let output_shape : Vec<usize> = match input_data[0].read() {
+                Ok(tensor) => self.evaluate_shape(tensor.desc()).unwrap(),
+                Err(E) => panic!("")
+            };
+            output_data[0].write().unwrap().resize(&output_shape).unwrap();
+            let output_grad_shape : Vec<usize> = match input_gradient[0].read() {
+                Ok(tensor) => self.evaluate_shape(tensor.desc()).unwrap(),
+                Err(E) => panic!("")
+            };
+            output_gradient[0].write().unwrap().resize(&output_grad_shape).unwrap();
+        }
     }
 }
 
@@ -92,15 +136,20 @@ impl<B: IBackend> ComputeParametersGradient<f32, B> for Reshape {}
 pub struct ReshapeConfig {
     /// The target shape that the input should assume.
     ///
-    /// Preceding dimensions are treated as independent inputs
+    /// Preceding dimensions are treated as independent inputs. At most one value can be -1,
+    /// indicating that the size of that element should be the remaining element dimensions, i.e.
+    /// Input [2,8] -> Reshape [-1, 4] -> Output [4, 4]
+    /// As the input has 16 elements, 16 / 4 is 4, so the output is [4, 4]
+    ///
+    /// Causes an error if the total elements are incompatible with the dimensions selected.
     ///
     /// Defaults to `1`
-    pub shape: Vec<usize>,
+    pub shape: Vec<isize>,
 }
 
 impl ReshapeConfig {
     /// Create a ReshapeConfig that describes a Reshape layer with a provided shape.
-    pub fn of_shape(shape: &[usize]) -> ReshapeConfig {
+    pub fn of_shape(shape: &[isize]) -> ReshapeConfig {
         ReshapeConfig {
             shape: shape.to_owned(),
         }
@@ -114,7 +163,7 @@ impl<'a> CapnpWrite<'a> for ReshapeConfig {
     fn write_capnp(&self, builder: &mut Self::Builder) {
         let mut shape = builder.reborrow().init_shape(self.shape.len() as u32);
         for (i, dim) in self.shape.iter().enumerate() {
-            shape.set(i as u32, *dim as u64);
+            shape.set(i as u32, *dim as i64);
         }
     }
 }
@@ -124,9 +173,9 @@ impl<'a> CapnpRead<'a> for ReshapeConfig {
 
     fn read_capnp(reader: Self::Reader) -> Self {
         let read_shape = reader.get_shape().unwrap();
-        let mut shape = Vec::new();
+        let mut shape: Vec<isize> = Vec::new();
         for i in 0..read_shape.len() {
-            shape.push(read_shape.get(i) as usize)
+            shape.push(read_shape.get(i) as isize)
         }
 
         ReshapeConfig { shape: shape }