Refactor RNNs to allow initialisation of the previous output and states.

The previous output and state of recurrent layers is saved. This makes the layers more flexible and also compatible for streaming mode. Fixes #230
CPJKU · Dec 6, 2016 · 1e8cd39 · 1e8cd39
1 parent 9a0b40b
commit 1e8cd39
Show file tree

Hide file tree

Showing 3 changed files with 174 additions and 49 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -17,11 +17,13 @@ Bug fixes:
 API relevant changes:
 
 * Reorder `GRUCell` parameters, to be consistent with all other layers (#235)
+* Rename `GRULayer` parameters, to be consistent with all other layers (#235)
 
 Other changes:
 
 * `num_threads` is passed to `ParallelProcessor` in single mode (#217)
 * Use `install_requires` in `setup.py` to specify dependencies (#226)
+* Allow initialisation of previous/hidden states in RNNs (#235)
 
 
 Version 0.14.1 (release date: 2016-08-01)

diff --git a/madmom/ml/nn/layers.py b/madmom/ml/nn/layers.py
@@ -98,43 +98,75 @@ class RecurrentLayer(FeedForwardLayer):
         Recurrent weights.
     activation_fn : numpy ufunc
         Activation function.
+    init : numpy array, shape (num_hiddens,), optional
+        Initial state of hidden units.
 
     """
 
-    def __init__(self, weights, bias, recurrent_weights, activation_fn):
+    def __init__(self, weights, bias, recurrent_weights, activation_fn,
+                 init=None):
         super(RecurrentLayer, self).__init__(weights, bias, activation_fn)
         self.recurrent_weights = recurrent_weights
+        if init is None:
+            self.init = np.zeros(self.bias.size, dtype=NN_DTYPE)
+        else:
+            self.init = init
+        self._prev = self.init.copy()
 
-    def activate(self, data):
+    def activate(self, data, reset=True):
         """
         Activate the layer.
 
         Parameters
         ----------
         data : numpy array, shape (num_frames, num_inputs)
             Activate with this data.
+        reset : bool, optional
+            Reset the state of the layer before activating it.
 
         Returns
         -------
         numpy array, shape (num_frames, num_hiddens)
             Activations for this data.
 
         """
-        # if we don't have recurrent weights, we don't have to loop
-        if self.recurrent_weights is None:
-            return super(RecurrentLayer, self).activate(data)
+        # init / reset previous time step
+        if reset:
+            self._init()
+            self.reset()
         # weight input and add bias
         out = np.dot(data, self.weights) + self.bias
         # loop through all time steps
         for i in range(len(data)):
             # add weighted previous step
-            if i >= 1:
-                out[i] += np.dot(out[i - 1], self.recurrent_weights)
+            out[i] += np.dot(self._prev, self.recurrent_weights)
             # apply activation function
             self.activation_fn(out[i], out=out[i])
+            # save current output
+            self._prev = out[i]
         # return
         return out
 
+    def _init(self):
+        # TODO: remove this initialisation code after updating the models
+        if not hasattr(self, 'init'):
+            self.init = np.zeros(self.bias.size, dtype=NN_DTYPE)
+
+    def reset(self, init=None):
+        """
+        Reset the layer to its initial state.
+
+        Parameters
+        ----------
+        init : numpy array, shape (num_hiddens,), optional
+            Reset the hidden units to this initial state.
+
+        """
+        if init is not None:
+            self._prev = init
+        else:
+            self._prev = self.init.copy()
+
 
 class BidirectionalLayer(Layer):
     """
@@ -283,66 +315,112 @@ class LSTMLayer(Layer):
         Output gate.
     activation_fn : numpy ufunc, optional
         Activation function.
+    init : numpy array, shape (num_hiddens, ), optional
+        Initial state of the layer.
+    cell_init : numpy array, shape (num_hiddens, ), optional
+        Initial state of the cell.
 
     """
 
     def __init__(self, input_gate, forget_gate, cell, output_gate,
-                 activation_fn=tanh):
+                 activation_fn=tanh, init=None, cell_init=None):
         self.input_gate = input_gate
         self.forget_gate = forget_gate
         self.cell = cell
         self.output_gate = output_gate
         self.activation_fn = activation_fn
-
-    def activate(self, data):
+        # keep the state of the layer and cell
+        if init is None:
+            self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+        else:
+            self.init = init
+        self._prev = self.init.copy()
+        if cell_init is None:
+            self.cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+        else:
+            self.cell_init = cell_init
+        self._state = self.cell_init.copy()
+
+    def activate(self, data, reset=True):
         """
         Activate the LSTM layer.
 
         Parameters
         ----------
         data : numpy array, shape (num_frames, num_inputs)
             Activate with this data.
+        reset : bool, optional
+            Reset the state of the layer before activating it.
 
         Returns
         -------
         numpy array, shape (num_frames, num_hiddens)
             Activations for this data.
 
         """
+        # init / reset previous time step
+        if reset:
+            self._init()
+            self.reset()
         # init arrays
         size = len(data)
         # output matrix for the whole sequence
         out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
-        # output (of the previous time step)
-        out_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
-        # state (of the previous time step)
-        state_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
         # process the input data
         for i in range(size):
             # cache input data
             data_ = data[i]
             # input gate:
-            # operate on current data, previous state and previous output
-            ig = self.input_gate.activate(data_, out_, state_)
+            # operate on current data, previous output and state
+            ig = self.input_gate.activate(data_, self._prev, self._state)
             # forget gate:
-            # operate on current data, previous state and previous output
-            fg = self.forget_gate.activate(data_, out_, state_)
+            # operate on current data, previous output and state
+            fg = self.forget_gate.activate(data_, self._prev, self._state)
             # cell:
             # operate on current data and previous output
-            cell = self.cell.activate(data_, out_)
+            cell = self.cell.activate(data_, self._prev)
             # internal state:
             # weight the cell with the input gate
             # and add the previous state weighted by the forget gate
-            state_ = cell * ig + state_ * fg
+            self._state = cell * ig + self._state * fg
             # output gate:
-            # operate on current data, current state and previous output
-            og = self.output_gate.activate(data_, out_, state_)
+            # operate on current data, previous output and current state
+            og = self.output_gate.activate(data_, self._prev, self._state)
             # output:
             # apply activation function to state and weight by output gate
-            out_ = self.activation_fn(state_) * og
-            out[i] = out_
+            out[i] = self.activation_fn(self._state) * og
+            # save current output
+            self._prev = out[i]
         return out
 
+    def _init(self):
+        # TODO: remove this initialisation code after updating the models
+        if not hasattr(self, 'init'):
+            self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+        if not hasattr(self, 'cell_init'):
+            self.cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+
+    def reset(self, init=None, cell_init=None):
+        """
+        Reset the layer to its initial state.
+
+        Parameters
+        ----------
+        init : numpy array, shape (num_hiddens,), optional
+            Reset the hidden units to this initial state.
+        cell_init : numpy array, shape (num_hiddens,), optional
+            Reset the cells to this initial state.
+
+        """
+        if init is not None:
+            self._prev = init
+        else:
+            self._prev = self.init.copy()
+        if cell_init is not None:
+            self._state = cell_init
+        else:
+            self._state = self.cell_init.copy()
+
 
 class GRUCell(object):
     """
@@ -411,7 +489,7 @@ def activate(self, data, prev, reset_gate):
         return self.activation_fn(out)
 
 
-class GRULayer(Layer):
+class GRULayer(RecurrentLayer):
     """
     Recurrent network layer with Gated Recurrent Units (GRU) as proposed in
     [1]_.
@@ -424,7 +502,7 @@ class GRULayer(Layer):
         Update gate.
     cell : :class:`GRUCell`
         GRU cell.
-    hid_init : numpy array, shape (num_hiddens,), optional
+    init : numpy array, shape (num_hiddens,), optional
         Initial state of hidden units.
 
     References
@@ -443,54 +521,82 @@ class GRULayer(Layer):
 
     """
 
-    def __init__(self, reset_gate, update_gate, cell, hid_init=None):
+    def __init__(self, reset_gate, update_gate, cell, init=None):
         # init the gates
         self.reset_gate = reset_gate
         self.update_gate = update_gate
         self.cell = cell
-        if hid_init is None:
-            hid_init = np.zeros(cell.bias.size, dtype=NN_DTYPE)
-        self.hid_init = hid_init
-
-    def activate(self, data):
+        # keep the state of the layer
+        if init is None:
+            self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+        else:
+            self.init = init
+        self._prev = self.init.copy()
+
+    def activate(self, data, reset=True):
         """
         Activate the GRU layer.
 
         Parameters
         ----------
         data : numpy array, shape (num_frames, num_inputs)
             Activate with this data.
+        reset : bool, optional
+            Reset the state of the layer before activating it.
 
         Returns
         -------
         numpy array, shape (num_frames, num_hiddens)
             Activations for this data.
 
         """
+        # init / reset previous time step
+        if reset:
+            self._init()
+            self.reset()
         # init arrays
         size = len(data)
         # output matrix for the whole sequence
-        out = np.zeros((size, self.update_gate.bias.size), dtype=NN_DTYPE)
-        # output (of the previous time step)
-        out_ = self.hid_init
+        out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
         # process the input data
         for i in range(size):
             # cache input data
             data_ = data[i]
             # reset gate:
             # operate on current data and previous output
-            rg = self.reset_gate.activate(data_, out_)
+            rg = self.reset_gate.activate(data_, self._prev)
             # update gate:
             # operate on current data and previous output
-            ug = self.update_gate.activate(data_, out_)
+            ug = self.update_gate.activate(data_, self._prev)
             # cell (implemented as in [1]):
             # operate on current data, previous output and reset gate
-            cell = self.cell.activate(data_, out_, rg)
-            # output (activation)
-            out_ = ug * cell + (1 - ug) * out_
-            out[i] = out_
+            cell = self.cell.activate(data_, self._prev, rg)
+            # output:
+            out[i] = ug * cell + (1 - ug) * self._prev
+            # save current output
+            self._prev = out[i]
         return out
 
+    def _init(self):
+        # TODO: remove this initialisation code after updating the models
+        if not hasattr(self, 'init'):
+            self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
+
+    def reset(self, init=None):
+        """
+        Reset the layer to its initial state.
+
+        Parameters
+        ----------
+        init : scalar or numpy array, shape (num_hiddens,), optional
+            Reset the hidden units to this initial state.
+
+        """
+        if init is not None:
+            self._prev = init
+        else:
+            self._prev = self.init.copy()
+
 
 def _kernel_margins(kernel_shape, margin_shift):
     """