Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Another implementation of CRF #4646

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ Demonstrates how to build a variational autoencoder.

[variational_autoencoder_deconv.py](variational_autoencoder_deconv.py)
Demonstrates how to build a variational autoencoder with Keras using deconvolution layers.

[conll2000_chunking_crf.py](conll2000_chunking_crf.py)
Demonstrates how to use CRF for Name Entity Recognition (NER).
95 changes: 95 additions & 0 deletions examples/conll2000_chunking_crf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
'''Train CRF and BiLSTM-CRF on CONLL2000 chunking data, similar to https://arxiv.org/pdf/1508.01991v1.pdf.

For CRF, we get ~0.93 after 10 epochs
For BiLSTM-CRF, we get ~0.94 after 3 epochs
'''

from __future__ import print_function
import numpy
from nltk.corpus import conll2000
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.layers import CRF
from collections import Counter
from sklearn.metrics import classification_report

numpy.random.seed(1223)

if not hasattr(conll2000, 'tagged_sents'):
import nltk
nltk.download('conll2000')

tagged_sents = [s for s in conll2000.tagged_sents()]
class_labels = sorted(list(set(w[1] for s in tagged_sents for w in s)))

N = len(tagged_sents)
train_nb = N * 0.8
idx = numpy.random.choice(numpy.arange(N), N, replace=False)
train = [tagged_sents[i] for i in idx[:train_nb]]
test = [tagged_sents[i] for i in idx[train_nb:]]

word_counts = Counter(w[0].lower() for s in train for w in s)
vocab = ['<pad>', '<unk>'] + [w for w, f in word_counts.iteritems() if f >= 3]
word2idx = dict((w, i) for i, w in enumerate(vocab))


def process_data(data, maxlen=None, onehot=False):
if maxlen is None:
maxlen = max(len(s) for s in data)
x = [[w[0].lower() for w in s] for s in data]
y = [[w[1] for w in s] for s in data]
x = pad_sequences([[word2idx.get(w[0].lower(), 1) for w in s] for s in data], maxlen=maxlen)
y = pad_sequences([[class_labels.index(w[1])for w in s] for s in data], maxlen=maxlen)
if onehot:
y = numpy.eye(len(class_labels), dtype='float32')[y]
return x, y
else:
return x, numpy.expand_dims(y, 2)


train_x, train_y = process_data(train)
test_x, test_y = process_data(test)

# --------------
# 1. Regular CRF
# --------------

print('==== training CRF ====')

model = Sequential()
model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding
crf = CRF(len(class_labels), sparse_target=True)
model.add(crf)
model.summary()

model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
model.fit(train_x, train_y, nb_epoch=10, validation_data=[test_x, test_y])

test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0]
test_y_true = test_y[test_x > 0]

print('\n---- Result of CRF ----\n')
print(classification_report(test_y_true, test_y_pred, target_names=class_labels))

# -------------
# 2. BiLSTM-CRF
# -------------

print('==== training BiLSTM-CRF ====')

model = Sequential()
model.add(Embedding(len(vocab), 200, mask_zero=True)) # Random embedding
model.add(Bidirectional(LSTM(100, return_sequences=True)))
crf = CRF(len(class_labels), sparse_target=True)
model.add(crf)
model.summary()

model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
model.fit(train_x, train_y, nb_epoch=10, validation_data=[test_x, test_y])

test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0]
test_y_true = test_y[test_x > 0]

print('\n---- Result of BiLSTM-CRF ----\n')
print(classification_report(test_y_true, test_y_pred, target_names=class_labels))
1 change: 1 addition & 0 deletions keras/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .advanced_activations import *
from .wrappers import *
from .convolutional_recurrent import *
from .crf import CRF
Loading