Skip to content

Commit

Permalink
Refactor RNNs to allow initialisation of the previous output and states.
Browse files Browse the repository at this point in the history
The previous output and state of recurrent layers is saved. This makes the
layers more flexible and also compatible for streaming mode. Fixes #230
  • Loading branch information
Sebastian Böck committed Dec 6, 2016
1 parent 9a0b40b commit 1e8cd39
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 49 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ Bug fixes:
API relevant changes:

* Reorder `GRUCell` parameters, to be consistent with all other layers (#235)
* Rename `GRULayer` parameters, to be consistent with all other layers (#235)

Other changes:

* `num_threads` is passed to `ParallelProcessor` in single mode (#217)
* Use `install_requires` in `setup.py` to specify dependencies (#226)
* Allow initialisation of previous/hidden states in RNNs (#235)


Version 0.14.1 (release date: 2016-08-01)
Expand Down
188 changes: 147 additions & 41 deletions madmom/ml/nn/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,43 +98,75 @@ class RecurrentLayer(FeedForwardLayer):
Recurrent weights.
activation_fn : numpy ufunc
Activation function.
init : numpy array, shape (num_hiddens,), optional
Initial state of hidden units.
"""

def __init__(self, weights, bias, recurrent_weights, activation_fn):
def __init__(self, weights, bias, recurrent_weights, activation_fn,
init=None):
super(RecurrentLayer, self).__init__(weights, bias, activation_fn)
self.recurrent_weights = recurrent_weights
if init is None:
self.init = np.zeros(self.bias.size, dtype=NN_DTYPE)
else:
self.init = init
self._prev = self.init.copy()

def activate(self, data):
def activate(self, data, reset=True):
"""
Activate the layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# if we don't have recurrent weights, we don't have to loop
if self.recurrent_weights is None:
return super(RecurrentLayer, self).activate(data)
# init / reset previous time step
if reset:
self._init()
self.reset()
# weight input and add bias
out = np.dot(data, self.weights) + self.bias
# loop through all time steps
for i in range(len(data)):
# add weighted previous step
if i >= 1:
out[i] += np.dot(out[i - 1], self.recurrent_weights)
out[i] += np.dot(self._prev, self.recurrent_weights)
# apply activation function
self.activation_fn(out[i], out=out[i])
# save current output
self._prev = out[i]
# return
return out

def _init(self):
# TODO: remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.bias.size, dtype=NN_DTYPE)

def reset(self, init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
"""
if init is not None:
self._prev = init
else:
self._prev = self.init.copy()


class BidirectionalLayer(Layer):
"""
Expand Down Expand Up @@ -283,66 +315,112 @@ class LSTMLayer(Layer):
Output gate.
activation_fn : numpy ufunc, optional
Activation function.
init : numpy array, shape (num_hiddens, ), optional
Initial state of the layer.
cell_init : numpy array, shape (num_hiddens, ), optional
Initial state of the cell.
"""

def __init__(self, input_gate, forget_gate, cell, output_gate,
activation_fn=tanh):
activation_fn=tanh, init=None, cell_init=None):
self.input_gate = input_gate
self.forget_gate = forget_gate
self.cell = cell
self.output_gate = output_gate
self.activation_fn = activation_fn

def activate(self, data):
# keep the state of the layer and cell
if init is None:
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
else:
self.init = init
self._prev = self.init.copy()
if cell_init is None:
self.cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
else:
self.cell_init = cell_init
self._state = self.cell_init.copy()

def activate(self, data, reset=True):
"""
Activate the LSTM layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# init / reset previous time step
if reset:
self._init()
self.reset()
# init arrays
size = len(data)
# output matrix for the whole sequence
out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
# output (of the previous time step)
out_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# state (of the previous time step)
state_ = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
# process the input data
for i in range(size):
# cache input data
data_ = data[i]
# input gate:
# operate on current data, previous state and previous output
ig = self.input_gate.activate(data_, out_, state_)
# operate on current data, previous output and state
ig = self.input_gate.activate(data_, self._prev, self._state)
# forget gate:
# operate on current data, previous state and previous output
fg = self.forget_gate.activate(data_, out_, state_)
# operate on current data, previous output and state
fg = self.forget_gate.activate(data_, self._prev, self._state)
# cell:
# operate on current data and previous output
cell = self.cell.activate(data_, out_)
cell = self.cell.activate(data_, self._prev)
# internal state:
# weight the cell with the input gate
# and add the previous state weighted by the forget gate
state_ = cell * ig + state_ * fg
self._state = cell * ig + self._state * fg
# output gate:
# operate on current data, current state and previous output
og = self.output_gate.activate(data_, out_, state_)
# operate on current data, previous output and current state
og = self.output_gate.activate(data_, self._prev, self._state)
# output:
# apply activation function to state and weight by output gate
out_ = self.activation_fn(state_) * og
out[i] = out_
out[i] = self.activation_fn(self._state) * og
# save current output
self._prev = out[i]
return out

def _init(self):
# TODO: remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
if not hasattr(self, 'cell_init'):
self.cell_init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)

def reset(self, init=None, cell_init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
cell_init : numpy array, shape (num_hiddens,), optional
Reset the cells to this initial state.
"""
if init is not None:
self._prev = init
else:
self._prev = self.init.copy()
if cell_init is not None:
self._state = cell_init
else:
self._state = self.cell_init.copy()


class GRUCell(object):
"""
Expand Down Expand Up @@ -411,7 +489,7 @@ def activate(self, data, prev, reset_gate):
return self.activation_fn(out)


class GRULayer(Layer):
class GRULayer(RecurrentLayer):
"""
Recurrent network layer with Gated Recurrent Units (GRU) as proposed in
[1]_.
Expand All @@ -424,7 +502,7 @@ class GRULayer(Layer):
Update gate.
cell : :class:`GRUCell`
GRU cell.
hid_init : numpy array, shape (num_hiddens,), optional
init : numpy array, shape (num_hiddens,), optional
Initial state of hidden units.
References
Expand All @@ -443,54 +521,82 @@ class GRULayer(Layer):
"""

def __init__(self, reset_gate, update_gate, cell, hid_init=None):
def __init__(self, reset_gate, update_gate, cell, init=None):
# init the gates
self.reset_gate = reset_gate
self.update_gate = update_gate
self.cell = cell
if hid_init is None:
hid_init = np.zeros(cell.bias.size, dtype=NN_DTYPE)
self.hid_init = hid_init

def activate(self, data):
# keep the state of the layer
if init is None:
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)
else:
self.init = init
self._prev = self.init.copy()

def activate(self, data, reset=True):
"""
Activate the GRU layer.
Parameters
----------
data : numpy array, shape (num_frames, num_inputs)
Activate with this data.
reset : bool, optional
Reset the state of the layer before activating it.
Returns
-------
numpy array, shape (num_frames, num_hiddens)
Activations for this data.
"""
# init / reset previous time step
if reset:
self._init()
self.reset()
# init arrays
size = len(data)
# output matrix for the whole sequence
out = np.zeros((size, self.update_gate.bias.size), dtype=NN_DTYPE)
# output (of the previous time step)
out_ = self.hid_init
out = np.zeros((size, self.cell.bias.size), dtype=NN_DTYPE)
# process the input data
for i in range(size):
# cache input data
data_ = data[i]
# reset gate:
# operate on current data and previous output
rg = self.reset_gate.activate(data_, out_)
rg = self.reset_gate.activate(data_, self._prev)
# update gate:
# operate on current data and previous output
ug = self.update_gate.activate(data_, out_)
ug = self.update_gate.activate(data_, self._prev)
# cell (implemented as in [1]):
# operate on current data, previous output and reset gate
cell = self.cell.activate(data_, out_, rg)
# output (activation)
out_ = ug * cell + (1 - ug) * out_
out[i] = out_
cell = self.cell.activate(data_, self._prev, rg)
# output:
out[i] = ug * cell + (1 - ug) * self._prev
# save current output
self._prev = out[i]
return out

def _init(self):
# TODO: remove this initialisation code after updating the models
if not hasattr(self, 'init'):
self.init = np.zeros(self.cell.bias.size, dtype=NN_DTYPE)

def reset(self, init=None):
"""
Reset the layer to its initial state.
Parameters
----------
init : scalar or numpy array, shape (num_hiddens,), optional
Reset the hidden units to this initial state.
"""
if init is not None:
self._prev = init
else:
self._prev = self.init.copy()


def _kernel_margins(kernel_shape, margin_shift):
"""
Expand Down
Loading

0 comments on commit 1e8cd39

Please sign in to comment.