Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small updates to make code work with Tensorflow 1.4. Locally usin… #77

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,4 @@ def prepare_custom_data(working_directory, train_enc, train_dec, test_enc, test_
data_to_token_ids(test_dec, dec_dev_ids_path, dec_vocab_path, tokenizer)

return (enc_train_ids_path, dec_train_ids_path, enc_dev_ids_path, dec_dev_ids_path, enc_vocab_path, dec_vocab_path)

3 changes: 2 additions & 1 deletion neuralconvo.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ mode = train
train_enc = data/train.enc
train_dec = data/train.dec
test_enc = data/test.enc
test_dec = data/test.enc
test_dec = data/test.dec

# folder where checkpoints, vocabulary, temporary data will be stored
working_directory = working_dir/
[ints]
Expand Down
3 changes: 2 additions & 1 deletion seq2seq.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ mode = train
train_enc = data/train.enc
train_dec = data/train.dec
test_enc = data/test.enc
test_dec = data/test.enc
test_dec = data/test.dec

# folder where checkpoints, vocabulary, temporary data will be stored
working_directory = working_dir/
[ints]
Expand Down
93 changes: 58 additions & 35 deletions seq2seq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,16 @@
from __future__ import division
from __future__ import print_function

import copy
import random

import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf

#from tensorflow.models.rnn.translate import data_utils
#fixes File "execute.py", line 31, in <module>
#import seq2seq_model
#File "C:\PYTHONCODE\Tensorflow\chatbot\tensorflow_chatbot\seq2seq_model.py", l
#ine 28, in <module>
#from tensorflow.models.rnn.translate import data_utils
#ModuleNotFoundError: No module named 'tensorflow.models'
import data_utils



class Seq2SeqModel(object):
"""Sequence-to-sequence model with attention and for multiple buckets.

Expand All @@ -51,10 +44,20 @@ class Seq2SeqModel(object):
http://arxiv.org/abs/1412.2007
"""

def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
num_layers, max_gradient_norm, batch_size, learning_rate,
learning_rate_decay_factor, use_lstm=False,
num_samples=512, forward_only=False):
def __init__(self,
source_vocab_size,
target_vocab_size,
buckets,
size,
num_layers,
max_gradient_norm,
batch_size,
learning_rate,
learning_rate_decay_factor,
use_lstm=False,
num_samples=512,
forward_only=False,
dtype=tf.float32):
"""Create the model.

Args:
Expand All @@ -76,12 +79,14 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
use_lstm: if true, we use LSTM cells instead of GRU cells.
num_samples: number of samples for sampled softmax.
forward_only: if set, we do not construct the backward pass in the model.
dtype: the data type to use to store internal variables.
"""
self.source_vocab_size = source_vocab_size
self.target_vocab_size = target_vocab_size
self.buckets = buckets
self.batch_size = batch_size
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.learning_rate = tf.Variable(
float(learning_rate), trainable=False, dtype=dtype)
self.learning_rate_decay_op = self.learning_rate.assign(
self.learning_rate * learning_rate_decay_factor)
self.global_step = tf.Variable(0, trainable=False)
Expand All @@ -91,34 +96,52 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
softmax_loss_function = None
# Sampled softmax only makes sense if we sample less than vocabulary size.
if num_samples > 0 and num_samples < self.target_vocab_size:
w = tf.get_variable("proj_w", [size, self.target_vocab_size])
w_t = tf.transpose(w)
b = tf.get_variable("proj_b", [self.target_vocab_size])
w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype)
w = tf.transpose(w_t)
b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
output_projection = (w, b)

def sampled_loss(inputs, labels):
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
self.target_vocab_size)
# We need to compute the sampled_softmax_loss using 32bit floats to
# avoid numerical instabilities.
local_w_t = tf.cast(w_t, tf.float32)
local_b = tf.cast(b, tf.float32)
local_inputs = tf.cast(logits, tf.float32)
return tf.cast(
tf.nn.sampled_softmax_loss(
weights=local_w_t,
biases=local_b,
labels=labels,
inputs=local_inputs,
num_sampled=num_samples,
num_classes=self.target_vocab_size),
dtype)
softmax_loss_function = sampled_loss

# Create the internal multi-layer cell for our RNN.
single_cell = tf.nn.rnn_cell.GRUCell(size)
def single_cell():
return tf.contrib.rnn.GRUCell(size)
if use_lstm:
single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
cell = single_cell
def single_cell():
return tf.contrib.rnn.BasicLSTMCell(size)
cell = single_cell()
if num_layers > 1:
cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)])

# The seq2seq function: we use embedding for the input and attention.
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.nn.seq2seq.embedding_attention_seq2seq(
encoder_inputs, decoder_inputs, cell,
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
output_projection=output_projection,
feed_previous=do_decode)
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode=False):
tmp_cell = copy.deepcopy(cell) #new
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs,
decoder_inputs,
tmp_cell, #new
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
output_projection=output_projection,
feed_previous=do_decode,
dtype=dtype)

# Feeds for inputs.
self.encoder_inputs = []
Expand All @@ -130,7 +153,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
for i in xrange(buckets[-1][1] + 1):
self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="decoder{0}".format(i)))
self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
self.target_weights.append(tf.placeholder(dtype, shape=[None],
name="weight{0}".format(i)))

# Our targets are decoder inputs shifted by one.
Expand All @@ -139,7 +162,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):

# Training outputs and losses.
if forward_only:
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
Expand All @@ -151,7 +174,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
for output in self.outputs[b]
]
else:
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets,
lambda x, y: seq2seq_f(x, y, False),
Expand All @@ -171,7 +194,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
self.updates.append(opt.apply_gradients(
zip(clipped_gradients, params), global_step=self.global_step))

self.saver = tf.train.Saver(tf.all_variables())
self.saver = tf.train.Saver(tf.global_variables())

def step(self, session, encoder_inputs, decoder_inputs, target_weights,
bucket_id, forward_only):
Expand Down
2 changes: 1 addition & 1 deletion seq2seq_serve.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mode = serve
train_enc = data/train.enc
train_dec = data/train.dec
test_enc = data/test.enc
test_dec = data/test.enc
test_dec = data/test.dec
# folder where checkpoints, vocabulary, temporary data will be stored
working_directory = working_dir/
[ints]
Expand Down