Skip to content

Commit

Permalink
Merge pull request #4 from fchollet/master
Browse files Browse the repository at this point in the history
update master
  • Loading branch information
olegsinavski committed Dec 16, 2015
2 parents 5e06aa5 + 42b3d37 commit ca37f96
Show file tree
Hide file tree
Showing 9 changed files with 408 additions and 52 deletions.
4 changes: 4 additions & 0 deletions docs/autogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def get_method_signature(method):
for a in args:
st += str(a) + ', '
for a, v in kwargs:
if type(v) == str:
v = '\'' + v + '\''
elif type(v) == unicode:
v = 'u\'' + v + '\''
st += str(a) + '=' + str(v) + ', '
if kwargs or args:
return st[:-2] + ')'
Expand Down
2 changes: 1 addition & 1 deletion keras/backend/theano_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def _step(input, *states):
if masking:
# if all-zero input timestep, return
# all-zero output and unchanged states
switch = T.any(input)
switch = T.any(input, axis=-1, keepdims=True)
output = T.switch(switch, output, 0. * output)
return_states = []
for state, new_state in zip(states, new_states):
Expand Down
44 changes: 34 additions & 10 deletions keras/layers/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,41 @@ def __init__(self, layers=[]):
self.layer_cache = {}
for layer in layers:
self.add(layer)

def __call__(self, X, train=False):
self._cache_enabled = True

def __call__(self, X, mask=None, train=False):
# turn off layer cache temporarily
tmp_cache_enabled = self.cache_enabled
self.cache_enabled = False
# recursively search for a layer which is not a Sequential model
layer = self
while issubclass(layer.__class__, Sequential):
layer = layer.layers[0]
# set temporary input to first layer
tmp = self.layers[0].get_input
self.layers[0].get_input = lambda _: X
tmp_input = layer.get_input
tmp_mask = None
layer.get_input = lambda _: X
if hasattr(layer, 'get_input_mask'):
tmp_mask = layer.get_input_mask
layer.get_input_mask = lambda _: mask
Y = self.get_output(train=train)
# return input to first layer to what it was
self.layers[0].get_input = tmp
# return input from first layer to what it was
layer.get_input = tmp_input
if hasattr(layer, 'get_input_mask'):
layer.get_input_mask = tmp_mask
self.cache_enabled = tmp_cache_enabled
return Y

@property
def cache_enabled(self):
return self._cache_enabled

@cache_enabled.setter
def cache_enabled(self, value):
self._cache_enabled = value
for l in self.layers:
l.cache_enabled = value

def set_previous(self, layer):
self.layers[0].previous = layer

Expand Down Expand Up @@ -375,9 +400,7 @@ def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
dot_axes: Same meaning as `dot_axes` argument of `add_node()`
outputs: Used when `merge_mode=None`. Names for the output nodes.
create_output: Same meaning as `create_output` argument of `add_node()`.
When creating an output, `merge_mode` must be specified.
'''
layer.layer_cache = self.layer_cache
if name in self.namespace:
raise Exception('Duplicate node identifier: ' + name)
for o in outputs:
Expand Down Expand Up @@ -408,7 +431,8 @@ def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
raise Exception('Unknown identifier: ' + input)
s = Siamese(layer, layers, merge_mode,
concat_axis=concat_axis,
dot_axes=dot_axes)
dot_axes=dot_axes,
is_graph=True)
self.namespace.add(name)
self.nodes[name] = s
self.node_config.append({'name': name,
Expand All @@ -425,7 +449,7 @@ def add_shared_node(self, layer, name, inputs=[], merge_mode=None,
self.namespace.add(sh_name)
self.nodes[sh_name] = sh
self.node_config.append({'name': sh_name,
'inputs': [s],
'inputs': [name],
'create_output': create_output})
if create_output:
self.add_output(sh_name, input=sh_name)
Expand Down
71 changes: 51 additions & 20 deletions keras/layers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class Layer(object):
def __init__(self, **kwargs):
allowed_kwargs = {'input_shape',
'trainable',
'batch_input_shape'}
'batch_input_shape',
'cache_enabled'}
for kwarg in kwargs:
assert kwarg in allowed_kwargs, "Keyword argument not understood: " + kwarg
if 'input_shape' in kwargs:
Expand All @@ -46,14 +47,31 @@ def __init__(self, **kwargs):
self._trainable = kwargs['trainable']
if not hasattr(self, 'params'):
self.params = []
self._cache_enabled = True
if 'cache_enabled' in kwargs:
self._cache_enabled = kwargs['cache_enabled']

def __call__(self, X, train=False):
@property
def cache_enabled(self):
return self._cache_enabled

@cache_enabled.setter
def cache_enabled(self, value):
self._cache_enabled = value

def __call__(self, X, mask=None, train=False):
# set temporary input
tmp = self.get_input
tmp_input = self.get_input
tmp_mask = None
if hasattr(self, 'get_input_mask'):
tmp_mask = self.get_input_mask
self.get_input_mask = lambda _: mask
self.get_input = lambda _: X
Y = self.get_output(train=train)
# return input to what it was
self.get_input = tmp
if hasattr(self, 'get_input_mask'):
self.get_input_mask = tmp_mask
self.get_input = tmp_input
return Y

def set_previous(self, layer, connection_map={}):
Expand Down Expand Up @@ -132,12 +150,12 @@ def get_input(self, train=False):
if hasattr(self, 'previous'):
# to avoid redundant computations,
# layer outputs are cached when possible.
if hasattr(self, 'layer_cache'):
if hasattr(self, 'layer_cache') and self.cache_enabled:
previous_layer_id = '%s_%s' % (id(self.previous), train)
if previous_layer_id in self.layer_cache:
return self.layer_cache[previous_layer_id]
previous_output = self.previous.get_output(train=train)
if hasattr(self, 'layer_cache'):
if hasattr(self, 'layer_cache') and self.cache_enabled:
previous_layer_id = '%s_%s' % (id(self.previous), train)
self.layer_cache[previous_layer_id] = previous_output
return previous_output
Expand Down Expand Up @@ -212,6 +230,7 @@ def get_config(self):
config['input_shape'] = self._input_shape[1:]
if hasattr(self, '_trainable'):
config['trainable'] = self._trainable
config['cache_enabled'] = self.cache_enabled
return config

def get_params(self):
Expand Down Expand Up @@ -458,6 +477,7 @@ def __init__(self, layers, mode='sum', concat_axis=-1, dot_axes=-1):
if p not in self.params:
self.params.append(p)
self.constraints.append(c)
super(Merge, self).__init__()

@property
def output_shape(self):
Expand Down Expand Up @@ -1285,6 +1305,7 @@ def __init__(self, function, output_shape=None, **kwargs):
self._output_shape = marshal.dumps(output_shape.__code__)
else:
self._output_shape = marshal.dumps(output_shape.func_code)
super(Lambda, self).__init__()

@property
def output_shape(self):
Expand Down Expand Up @@ -1359,6 +1380,7 @@ def __init__(self, layers, function, output_shape=None):
self._output_shape = marshal.dumps(output_shape.__code__)
else:
self._output_shape = marshal.dumps(output_shape.func_code)
super(Lambda, self).__init__()

@property
def output_shape(self):
Expand Down Expand Up @@ -1442,9 +1464,10 @@ class Siamese(Layer):
merge_mode: Same meaning as `mode` argument of Merge layer
concat_axis: Same meaning as `concat_axis` argument of Merge layer
dot_axes: Same meaning as `dot_axes` argument of Merge layer
is_graph: Should be set to True when used inside `Graph`
'''
def __init__(self, layer, inputs, merge_mode='concat',
concat_axis=1, dot_axes=-1):
concat_axis=1, dot_axes=-1, is_graph=False):
if merge_mode not in ['sum', 'mul', 'concat', 'ave',
'join', 'cos', 'dot', None]:
raise Exception('Invalid merge mode: ' + str(merge_mode))
Expand All @@ -1454,17 +1477,19 @@ def __init__(self, layer, inputs, merge_mode='concat',
raise Exception(merge_mode + ' merge takes exactly 2 layers')

self.layer = layer
self.trainable = layer.trainable
self.is_graph = is_graph
self.inputs = inputs
self.params = []
self.layer.set_previous(inputs[0])
self.merge_mode = merge_mode
self.concat_axis = concat_axis
self.dot_axes = dot_axes
layer.set_previous(inputs[0])
self.params = []
self.regularizers = []
self.constraints = []
self.updates = []
layers = [layer]
if merge_mode:
if merge_mode and not is_graph:
layers += inputs
for l in layers:
params, regs, consts, updates = l.get_params()
Expand All @@ -1475,6 +1500,7 @@ def __init__(self, layer, inputs, merge_mode='concat',
if p not in self.params:
self.params.append(p)
self.constraints.append(c)
super(Siamese, self).__init__()

@property
def output_shape(self):
Expand Down Expand Up @@ -1512,15 +1538,18 @@ def output_shape(self):
def get_params(self):
return self.params, self.regularizers, self.constraints, self.updates

def set_layer_input(self, index):
l = self.layer
while not hasattr(l, 'previous'):
l = l.layers[0]
l.previous = self.inputs[index]
def set_layer_input(self, head):
layer = self.layer
from ..layers.containers import Sequential
while issubclass(layer.__class__, Sequential):
layer = layer.layers[0]
layer.previous = self.inputs[head]

def get_output_at(self, head, train=False):
self.set_layer_input(head)
return self.layer.get_output(train)
X = self.inputs[head].get_output(train)
mask = self.inputs[head].get_output_mask(train)
Y = self.layer(X, mask)
return Y

def get_output_shape(self, head, train=False):
self.set_layer_input(head)
Expand Down Expand Up @@ -1621,7 +1650,7 @@ def get_output_mask(self, train=None):

def get_weights(self):
weights = self.layer.get_weights()
if self.merge_mode:
if self.merge_mode and not self.is_graph:
for m in self.inputs:
weights += m.get_weights()
return weights
Expand All @@ -1630,7 +1659,7 @@ def set_weights(self, weights):
nb_param = len(self.layer.params)
self.layer.set_weights(weights[:nb_param])
weights = weights[nb_param:]
if self.merge_mode:
if self.merge_mode and not self.is_graph:
for i in range(len(self.inputs)):
nb_param = len(self.inputs[i].params)
self.inputs[i].set_weights(weights[:nb_param])
Expand All @@ -1642,7 +1671,8 @@ def get_config(self):
'inputs': [m.get_config() for m in self.inputs],
'merge_mode': self.merge_mode,
'concat_axis': self.concat_axis,
'dot_axes': self.dot_axes}
'dot_axes': self.dot_axes,
'is_graph': self.is_graph}
base_config = super(Siamese, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

Expand All @@ -1661,6 +1691,7 @@ class SiameseHead(Layer):
def __init__(self, head):
self.head = head
self.params = []
super(SiameseHead, self).__init__()

def get_output(self, train=False):
return self.get_input(train)
Expand Down
53 changes: 40 additions & 13 deletions keras/preprocessing/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
"""
Pad each sequence to the same length:
Pad each sequence to the same length:
the length of the longest sequence.
If maxlen is provided, any sequence longer
Expand All @@ -15,6 +15,19 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncati
Supports post-padding and pre-padding (default).
Parameters:
-----------
sequences: list of lists where each element is a sequence
maxlen: int, maximum length
dtype: type to cast the resulting sequence.
padding: 'pre' or 'post', pad either before or after each sequence.
truncating: 'pre' or 'post', remove values from sequences larger than
maxlen either in the beginning or in the end of the sequence
value: float, value to pad the sequences to the desired value.
Returns:
x: numpy array with dimensions (number_of_sequences, maxlen)
"""
lengths = [len(s) for s in sequences]

Expand Down Expand Up @@ -47,39 +60,53 @@ def make_sampling_table(size, sampling_factor=1e-5):
This generates an array where the ith element
is the probability that a word of rank i would be sampled,
according to the sampling distribution used in word2vec.
The word2vec formula is:
p(word) = min(1, sqrt(word.frequency/sampling_factor) / (word.frequency/sampling_factor))
We assume that the word frequencies follow Zipf's law (s=1) to derive
We assume that the word frequencies follow Zipf's law (s=1) to derive
a numerical approximation of frequency(rank):
frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))
where gamma is the Euler-Mascheroni constant.
Parameters:
-----------
size: int, number of possible words to sample.
'''
gamma = 0.577
rank = np.array(list(range(size)))
rank[0] = 1
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank)
f = sampling_factor * inv_fq

return np.minimum(1., f / np.sqrt(f))


def skipgrams(sequence, vocabulary_size,
window_size=4, negative_samples=1., shuffle=True,
categorical=False, sampling_table=None):
'''
Take a sequence (list of indexes of words),
def skipgrams(sequence, vocabulary_size,
window_size=4, negative_samples=1., shuffle=True,
categorical=False, sampling_table=None):
'''
Take a sequence (list of indexes of words),
returns couples of [word_index, other_word index] and labels (1s or 0s),
where label = 1 if 'other_word' belongs to the context of 'word',
and label=0 if 'other_word' is ramdomly sampled
@param vocabulary_size: int. maximum possible word index + 1
@param window_size: int. actually half-window. The window of a word wi will be [i-window_size, i+window_size+1]
@param negative_samples: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc.
@param categorical: bool. if False, labels will be integers (eg. [0, 1, 1 .. ]),
Paramaters:
-----------
vocabulary_size: int. maximum possible word index + 1
window_size: int. actually half-window. The window of a word wi will be [i-window_size, i+window_size+1]
negative_samples: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc.
categorical: bool. if False, labels will be integers (eg. [0, 1, 1 .. ]),
if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ]
Note: by convention, index 0 in the vocabulary is a non-word and will be skipped.
Returns:
--------
couples, lables: where `couples` are int pairs and
`labels` are either 0 or 1.
Notes:
------
By convention, index 0 in the vocabulary is a non-word and will be skipped.
'''
couples = []
labels = []
Expand Down
Loading

0 comments on commit ca37f96

Please sign in to comment.