Skip to content

Commit

Permalink
🐛 (git) conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
imgarylai committed Jul 25, 2018
1 parent a8336c3 commit 20aa578
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 51 deletions.
6 changes: 3 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

# -- Project information -----------------------------------------------------

project = 'Elit'
copyright = '2018, Gary Lai'
author = 'Gary Lai'
project = 'ELIT'
copyright = '2018, Emory NLP'
author = 'Jinho D. Choi, Gary Lai'

# The short X.Y version
version = ''
Expand Down
2 changes: 1 addition & 1 deletion elit/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def save(self, model_path, **kwargs):
pickle.dump(self.conv2d_config, f)
pickle.dump(self.hidden_config, f)

self.model.save_parameters(gln(model_path))
# self.model.save_parameters(gln(model_path))

# override
def decode(self, input_data, batch_size=2048, **kwargs):
Expand Down
19 changes: 9 additions & 10 deletions elit/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, input_config, output_config, conv2d_config=None, hidden_confi
super().__init__(**kwargs)

def pool(c):
if c.pool is None: return lambda x: x
if c.pool is None: return None
p = mx.gluon.nn.MaxPool2D if c.pool == 'max' else mx.gluon.nn.AvgPool2D
n = input_config.maxlen - c.ngram + 1
return p(pool_size=(n, 1), strides=(n, 1))
Expand All @@ -60,20 +60,18 @@ def pool(c):
if self.conv2d:
for i, c in enumerate(self.conv2d, 1):
setattr(self, 'conv_'+str(i), c.conv)
setattr(self, 'conv_pool_'+str(i), c.pool)
setattr(self, 'conv_dropout_' + str(i), c.dropout)
if c.pool: setattr(self, 'conv_pool_'+str(i), c.pool)

if self.hidden:
for i, h in enumerate(self.hidden, 1):
setattr(self, 'hidden_' + str(i), h.dense)
setattr(self, 'hidden_dropout_' + str(i), h.dropout)

def forward(self, x):
"""
def conv(c):
return c.dropout(c.pool(c.conv(x))) if c.pool else c.dropout(c.conv(x).reshape((0, -1)))

:param x:
:return:
"""
# input layer
x = self.input_dropout(x)

Expand All @@ -85,7 +83,7 @@ def forward(self, x):
# conv: [(batches, filters, maxlen - ngram + 1, 1) for ngram in ngrams]
# pool: [(batches, filters, 1, 1) for ngram in ngrams]
# reshape: [(batches, filters * x * y) for ngram in ngrams]
t = [c.dropout(c.pool(c.conv(x))) for c in self.conv2d]
t = [conv(c) for c in self.conv2d]
x = nd.concat(*t, dim=1)

if self.hidden:
Expand Down Expand Up @@ -117,9 +115,10 @@ def conv2d_args(s):
"""
def create(config):
c = config.split(':')
return conv2d_namespace(ngram=int(c[0]), filters=int(c[1]), activation=c[2], pool=c[3], dropout=float(c[4]))
pool = c[3] if c[3].lower() != 'none' else None
return conv2d_namespace(ngram=int(c[0]), filters=int(c[1]), activation=c[2], pool=pool, dropout=float(c[4]))

return (create(config) for config in s.split(';')) if s != 'None' else None
return tuple(create(config) for config in s.split(';')) if s.lower() != 'none' else None


def hidden_namespace(dim, activation, dropout):
Expand All @@ -135,4 +134,4 @@ def create(config):
c = config.split(':')
return SimpleNamespace(dim=int(c[0]), activation=c[1], dropout=float(c[2]))

return (create(config) for config in s.split(';')) if s != 'None' else None
return tuple(create(config) for config in s.split(';')) if s.lower() != 'none' else None
40 changes: 20 additions & 20 deletions elit/pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,7 @@ def int_tuple(s):
help='dropout rate applied to the input layer')
parser.add_argument('-cc', '--conv2d_config', type=conv2d_args,
metavar='(ngram:filters:activation:pool:dropout)(;#1)*',
default=tuple(SimpleNamespace(ngram=i, filters=128, activation='relu',
pool='avg', dropout=0.2) for i in range(1, 5)),
default=tuple(SimpleNamespace(ngram=i, filters=128, activation='relu', pool=None, dropout=0.2) for i in range(1, 5)),
help='configuration for the convolution layer')
parser.add_argument('-hc', '--hidden_config', type=hidden_args, metavar='(dim:activation:dropout)(;#1)*', default=None,
help='configuration for the hidden layer')
Expand Down Expand Up @@ -198,25 +197,26 @@ def train():
def evaluate():
# cml arguments
args = train_args()

# vector space models
vsm_list = [FastText(args.word_vsm)]
if args.ambi_vsm: vsm_list.append(Word2Vec(args.ambi_vsm))

# component
comp = POSTagger(args.ctx, vsm_list)
comp.load(args.model_path)

# data
reader, reader_args = args.reader
dev_data = reader(args.dev_path, reader_args)

# decode
states = group_states(dev_data, comp.create_state)
e = comp._evaluate(states, reset=True)
print('DEV: %5.2f (%d/%d)' % (e.get(), e.correct, e.total))
print(args.hidden_config)

# # vector space models
# vsm_list = [FastText(args.word_vsm)]
# if args.ambi_vsm: vsm_list.append(Word2Vec(args.ambi_vsm))
#
# # component
# comp = POSTagger(args.ctx, vsm_list)
# comp.load(args.model_path)
#
# # data
# reader, reader_args = args.reader
# dev_data = reader(args.dev_path, reader_args)
#
# # decode
# states = group_states(dev_data, comp.create_state)
# e = comp._evaluate(states, reset=True)
# print('DEV: %5.2f (%d/%d)' % (e.get(), e.correct, e.total))


if __name__ == '__main__':
train()
evaluate()
# evaluate()
75 changes: 59 additions & 16 deletions elit/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,84 @@
from types import SimpleNamespace

import mxnet as mx
import numpy as np
from mxnet import gluon

from elit.component import NLPComponent
from elit.state import NLPState
from elit.structure import POS

__author__ = 'Jinho D. Choi'

class SentenceBasedSentimentState(NLPState):
def __init__(self, document, vsm, label_map, maxlen):

class SentenceClassificationState(NLPState):
def __init__(self, document, vsm, label_map, maxlen, key, key_out=None):
"""
POSState inherits the left-to-right one-pass (LR1P) decoding strategy from ForwardState.
SentenceClassificationState labels each sentence in the input document with a certain class
(e.g., positive or negative for sentiment analysis).
:param document: an input document.
:type document: elit.structure.Document
:param vsm: a vector space model for word embeddings.
:type vsm: elit.vsm.VectorSpaceModel
:param label_map: the mapping between class labels and their unique IDs.
:type label_map: elit.vsm.LabelMap
:param maxlen: the maximum length of each sentence.
:type maxlen: int
:param key: the key to each sentence in the input document where the inferred labels (self.labels) are saved.
:type key: str
:param key_out: the key to each sentence in the input document where the predicted outputs (self.outputs) are saved.
:type key_out: str
"""
super().__init__(document)
self.emb = vsm.document_matrix(document.tokens, maxlen)
self.embs = [vsm.document_matrix(sen.tokens, maxlen) for sen in document]
self.label_map = label_map
self.key = key
self.key_out = key_out if key_out else key + '-out'

def reset(self):
def process(self, outputs):
"""
Nothing to reset.
Saves the predicted outputs to self.outputs.
:param outputs: a matrix where each row contains the prediction scores for the corresponding sentence.
:param outputs: numpy.array
"""
pass
self.outputs = outputs

def process(self, output):
self.outputs
def finalize(self):
"""
Saves the predicted outputs (self.outputs) and the inferred labels (self.labels) to the input document once decoding is done.
"""
for i, labels in enumerate(self.labels):
d = self.document.get_sentence(i)
d[self.key] = labels
d[self.key_out] = self.outputs[i]

def eval(self, metric):
"""
:param metric: the accuracy metric.
:type metric: elit.util.Accuracy
"""
autos = self.labels
pass


def reset(self):
"""
Nothing to reset.
"""
pass

def has_next(self):
"""
No use for this class.
:return: False.
"""
return False




for i, sentence in enumerate(self.document):
gold = sentence[POS]
auto = autos[i]
metric.correct += len([1 for g, p in zip(gold, auto) if g == p])
metric.total += len(gold)

@property
def labels(self):
return [self.label_map.get(np.argmax(output)) for output in self.outputs]

@property
def x(self):
Expand All @@ -71,6 +105,10 @@ def x(self):
n = np.column_stack(l)
return n

@property
def y(self):
pass


class DocumentClassificationCNNModel(gluon.Block):
def __init__(self, input_config, output_config, conv2d_config, **kwargs):
Expand Down Expand Up @@ -98,6 +136,11 @@ def pool(c):

class SentimentAnalyzer(NLPComponent):
def __init__(self, ctx, vsm):
"""
:param ctx:
:type ctx: mx.
:param vsm:
"""
super().__init__(ctx)
self.vsm = vsm

Expand Down
2 changes: 1 addition & 1 deletion elit/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def has_next(self):

def finalize(self):
"""
Saves the predicted outputs (self.outputs) and the inferred labels (self.labels) to the input document after decoding.
Saves the predicted outputs (self.outputs) and the inferred labels (self.labels) to the input document once decoding is done.
"""
for i, labels in enumerate(self.labels):
d = self.document.get_sentence(i)
Expand Down

0 comments on commit 20aa578

Please sign in to comment.