From ad941855f33788321f9ea9271540a58f83f0b6ac Mon Sep 17 00:00:00 2001 From: linxihui Date: Wed, 25 Jan 2017 01:22:39 -0500 Subject: [PATCH] fixed PEP8 --- examples/conll2000_chunking_crf.py | 6 ++++-- keras/layers/crf.py | 28 ++++++++++++++-------------- tests/keras/layers/test_crf.py | 12 ++++++------ 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/examples/conll2000_chunking_crf.py b/examples/conll2000_chunking_crf.py index ea1d29fc90ba..2d9b150dc09e 100644 --- a/examples/conll2000_chunking_crf.py +++ b/examples/conll2000_chunking_crf.py @@ -33,6 +33,7 @@ vocab = ['', ''] + [w for w, f in word_counts.iteritems() if f >= 3] word2idx = dict((w, i) for i, w in enumerate(vocab)) + def process_data(data, maxlen=None, onehot=False): if maxlen is None: maxlen = max(len(s) for s in data) @@ -46,6 +47,7 @@ def process_data(data, maxlen=None, onehot=False): else: return x, numpy.expand_dims(y, 2) + train_x, train_y = process_data(train) test_x, test_y = process_data(test) @@ -56,7 +58,7 @@ def process_data(data, maxlen=None, onehot=False): print('==== training CRF ====') model = Sequential() -model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding +model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding crf = CRF(len(class_labels), sparse_target=True) model.add(crf) model.summary() @@ -77,7 +79,7 @@ def process_data(data, maxlen=None, onehot=False): print('==== training BiLSTM-CRF ====') model = Sequential() -model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding +model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding model.add(Bidirectional(LSTM(100, return_sequences=True))) crf = CRF(len(class_labels), sparse_target=True) model.add(crf) diff --git a/keras/layers/crf.py b/keras/layers/crf.py index efde694c93ad..a6e129423689 100644 --- a/keras/layers/crf.py +++ b/keras/layers/crf.py @@ -45,15 +45,15 @@ class CRF(Layer): model = Sequential() model.add(Embedding(3001, 300, mask_zero=True)(X) - # use learn_mode = 'join', test_mode = 'viterbi' - crf = CRF(10) - model.add(crf(Embed)) + # use learn_mode = 'join', test_mode = 'viterbi', sparse_target = True (label indice output) + crf = CRF(10, sparse_target=True) + model.add(crf) # crf.accuracy is default to Viterbi acc if using join-mode (default). # One can add crf.marginal_acc if interested, but may slow down learning model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) - # y can be either onehot representation or label indices (with shape 1 at dim 3) + # y must be label indices (with shape 1 at dim 3) here, since `sparse_target=True` model.fit(x, y) # prediction give onehot representation of Viterbi best path @@ -331,16 +331,16 @@ def get_logZ(self, in_energy, mask): def get_energy(self, y_true, in_energy, mask): '''Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 ''' - in_energy = K.sum(in_energy * y_true, 2) # (B, T) + in_energy = K.sum(in_energy * y_true, 2) # (B, T) chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.U) * y_true[:, 1:, :], 2) # (B, T-1) chain_energy = self.chain_activation(chain_energy) if mask is not None: mask = K.cast(mask, K.floatx()) - chain_mask = mask[:, :-1] * mask[:, 1:] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding + chain_mask = mask[:, :-1] * mask[:, 1:] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding in_energy = in_energy * mask chain_energy = chain_energy * chain_mask - total_energy = K.sum(in_energy, -1) + K.sum(chain_energy, -1) # (B, ) + total_energy = K.sum(in_energy, -1) + K.sum(chain_energy, -1) # (B, ) return total_energy @@ -368,19 +368,19 @@ def step(self, in_energy_t, states, return_logZ=True): t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K._BACKEND == 'theano': - m = states[3][:, t:(t+2)] + m = states[3][:, t:(t + 2)] else: m = tf.slice(states[3], [0, t], [-1, 2]) in_energy_t = in_energy_t * K.expand_dims(m[:, 0]) chain_energy = chain_energy * K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) # (1, F, F)*(B, 1, 1) -> (B, F, F) if return_logZ: - energy = chain_energy + K.expand_dims(in_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) - new_target_val = self.log_sum_exp(-energy, 1) # shapes: (B, F) + energy = chain_energy + K.expand_dims(in_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) + new_target_val = self.log_sum_exp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(in_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) - argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` + argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` return argmin_table, [min_energy, i + 1] def recursion(self, in_energy, mask=None, go_backwards=False, return_sequences=True, return_logZ=True): @@ -403,8 +403,8 @@ def recursion(self, in_energy, mask=None, go_backwards=False, return_sequences=T If `return_logZ = False`, compute the Viterbi's best path lookup table. ''' chain_energy = self.chain_activation(self.U) - chain_energy = K.expand_dims(chain_energy, 0) # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t - prev_target_val = K.zeros_like(in_energy[:, 0, :]) # shape=(B, F), dtype=float32 + chain_energy = K.expand_dims(chain_energy, 0) # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t + prev_target_val = K.zeros_like(in_energy[:, 0, :]) # shape=(B, F), dtype=float32 if go_backwards: in_energy = K.reverse(in_energy, 1) @@ -458,7 +458,7 @@ def viterbi_decoding(self, X, mask=None): # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) - initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` + initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` def gather_each_row(params, indices): n = K.shape(indices)[0] diff --git a/tests/keras/layers/test_crf.py b/tests/keras/layers/test_crf.py index 0bbe66def908..46fc7a4d5cf2 100644 --- a/tests/keras/layers/test_crf.py +++ b/tests/keras/layers/test_crf.py @@ -12,11 +12,11 @@ def test_CRF(): # data x = np.random.randint(1, embedding_num, nb_samples * timesteps).reshape((nb_samples, timesteps)) - x[0, -4:] = 0 # right padding - x[1, :5] = 0 # left padding + x[0, -4:] = 0 # right padding + x[1, :5] = 0 # left padding y = np.random.randint(0, output_dim, nb_samples * timesteps).reshape((nb_samples, timesteps)) y_onehot = np.eye(output_dim)[y] - y = np.expand_dims(y, 2) # .astype('float32') + y = np.expand_dims(y, 2) # .astype('float32') # test with no masking, onehot, fix length model = Sequential() @@ -37,8 +37,8 @@ def test_CRF(): # check mask y_pred = model.predict(x).argmax(-1) - assert (y_pred[0, -4:] == 0).all() # right padding - assert (y_pred[1, :5] == 0).all() # left padding + assert (y_pred[0, -4:] == 0).all() # right padding + assert (y_pred[1, :5] == 0).all() # left padding # test `viterbi_acc _, v_acc, _ = model.evaluate(x, y) @@ -48,7 +48,7 @@ def test_CRF(): # test config model.get_config() model = model_from_json(model.to_json()) - + # test marginal learn mode, fix length, unroll model = Sequential()