From fb50917ad304d0fb56ed7ed2bf3ce617d978308b Mon Sep 17 00:00:00 2001
From: Pierros Skafidas <p.c.skafidas@gmail.com>
Date: Wed, 5 May 2021 14:02:24 +0300
Subject: [PATCH] update

---
 TSP/tsp_utils.py         |  8 +++---
 VRP/vrp_attention.py     | 40 +++++++++++++-------------
 VRP/vrp_utils.py         | 12 ++++----
 main.py                  | 10 ++++---
 misc_utils.py            | 34 +++++++++++-----------
 model/attention_agent.py | 62 ++++++++++++++++++++--------------------
 shared/attention.py      | 24 ++++++++--------
 shared/decode_step.py    |  8 +++---
 shared/embeddings.py     | 10 +++----
 shared/misc_utils.py     | 16 +++++------
 10 files changed, 113 insertions(+), 111 deletions(-)

diff --git a/TSP/tsp_utils.py b/TSP/tsp_utils.py
index 73ef939..925e0c3 100644
--- a/TSP/tsp_utils.py
+++ b/TSP/tsp_utils.py
@@ -120,10 +120,10 @@ def __init__(self,
 
         self.n_nodes = args['n_nodes']
         self.input_dim = args['input_dim']
-        self.input_data = tf.placeholder(tf.float32,\
+        self.input_data = tf.compat.v1.placeholder(tf.float32,\
             shape=[None,self.n_nodes,args['input_dim']])
         self.input_pnt = self.input_data
-        self.batch_size = tf.shape(self.input_data)[0] 
+        self.batch_size = tf.shape(input=self.input_data)[0] 
 
     def reset(self,beam_width=1):
         '''
@@ -198,6 +198,6 @@ def reward_func(sample_solution=None):
     # get the reward based on the route lengths
 
 
-    route_lens_decoded = tf.reduce_sum(tf.pow(tf.reduce_sum(tf.pow(\
-        (sample_solution_tilted - sample_solution) ,2), 2) , .5), 0)
+    route_lens_decoded = tf.reduce_sum(input_tensor=tf.pow(tf.reduce_sum(input_tensor=tf.pow(\
+        (sample_solution_tilted - sample_solution) ,2), axis=2) , .5), axis=0)
     return route_lens_decoded 
\ No newline at end of file
diff --git a/VRP/vrp_attention.py b/VRP/vrp_attention.py
index d80341d..079c23a 100644
--- a/VRP/vrp_attention.py
+++ b/VRP/vrp_attention.py
@@ -6,19 +6,19 @@ def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
         self.use_tanh = use_tanh
         self._scope = _scope
 
-        with tf.variable_scope(_scope+_name):
+        with tf.compat.v1.variable_scope(_scope+_name):
             # self.v: is a variable with shape [1 x dim]
-            self.v = tf.get_variable('v',[1,dim],
-                       initializer=tf.contrib.layers.xavier_initializer())
+            self.v = tf.compat.v1.get_variable('v',[1,dim],
+                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
             self.v = tf.expand_dims(self.v,2)
             
-        self.emb_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_d' ) #conv1d
-        self.emb_ld = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_ld' ) #conv1d_2
+        self.emb_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_d' ) #conv1d
+        self.emb_ld = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_ld' ) #conv1d_2
 
-        self.project_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_d' ) #conv1d_1
-        self.project_ld = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ld' ) #conv1d_3
-        self.project_query = tf.layers.Dense(dim,_scope=_scope+_name+'/proj_q' ) #
-        self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ref' ) #conv1d_4
+        self.project_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_d' ) #conv1d_1
+        self.project_ld = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ld' ) #conv1d_3
+        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name+'/proj_q' ) #
+        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ref' ) #conv1d_4
 
 
         self.C = C  # tanh exploration parameter
@@ -40,7 +40,7 @@ def __call__(self, query, ref, env):
         # get the current demand and load values from environment
         demand = env.demand
         load = env.load
-        max_time = tf.shape(demand)[1]
+        max_time = tf.shape(input=demand)[1]
 
         # embed demand and project it
         # emb_d:[batch_size x max_time x dim ]
@@ -61,7 +61,7 @@ def __call__(self, query, ref, env):
         expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])
 
         # v_view:[batch_size x dim x 1]
-        v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
+        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
         
         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
         #       [batch_size x max_time]
@@ -82,17 +82,17 @@ def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
         self.use_tanh = use_tanh
         self._scope = _scope
 
-        with tf.variable_scope(_scope+_name):
+        with tf.compat.v1.variable_scope(_scope+_name):
             # self.v: is a variable with shape [1 x dim]
-            self.v = tf.get_variable('v',[1,dim],
-                       initializer=tf.contrib.layers.xavier_initializer())
+            self.v = tf.compat.v1.get_variable('v',[1,dim],
+                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
             self.v = tf.expand_dims(self.v,2)
             
-        self.emb_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/emb_d') #conv1d
-        self.project_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_d') #conv1d_1
+        self.emb_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/emb_d') #conv1d
+        self.project_d = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_d') #conv1d_1
         
-        self.project_query = tf.layers.Dense(dim,_scope=_scope+_name +'/proj_q') #
-        self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_e') #conv1d_2
+        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name +'/proj_q') #
+        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_e') #conv1d_2
 
         self.C = C  # tanh exploration parameter
         self.tanh = tf.nn.tanh
@@ -119,7 +119,7 @@ def __call__(self, query, ref, env):
         """
         # we need the first demand value for the critic
         demand = env.input_data[:,:,-1]
-        max_time = tf.shape(demand)[1]
+        max_time = tf.shape(input=demand)[1]
 
         # embed demand and project it
         # emb_d:[batch_size x max_time x dim ]
@@ -134,7 +134,7 @@ def __call__(self, query, ref, env):
         expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])
 
         # v_view:[batch_size x dim x 1]
-        v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
+        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
         
         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
         #       [batch_size x max_time]
diff --git a/VRP/vrp_utils.py b/VRP/vrp_utils.py
index 98ebdcf..8ce9c82 100644
--- a/VRP/vrp_utils.py
+++ b/VRP/vrp_utils.py
@@ -144,12 +144,12 @@ def __init__(self,
         self.n_nodes = args['n_nodes']
         self.n_cust = args['n_cust']
         self.input_dim = args['input_dim']
-        self.input_data = tf.placeholder(tf.float32,\
+        self.input_data = tf.compat.v1.placeholder(tf.float32,\
             shape=[None,self.n_nodes,self.input_dim])
 
         self.input_pnt = self.input_data[:,:,:2]
         self.demand = self.input_data[:,:,-1]
-        self.batch_size = tf.shape(self.input_pnt)[0] 
+        self.batch_size = tf.shape(input=self.input_pnt)[0] 
         
     def reset(self,beam_width=1):
         '''
@@ -220,7 +220,7 @@ def step(self,
         d_sat = tf.minimum(tf.gather_nd(self.demand,batched_idx), self.load)
 
         # update the demand
-        d_scatter = tf.scatter_nd(batched_idx, d_sat, tf.cast(tf.shape(self.demand),tf.int64))
+        d_scatter = tf.scatter_nd(batched_idx, d_sat, tf.cast(tf.shape(input=self.demand),tf.int64))
         self.demand = tf.subtract(self.demand, d_scatter)
 
         # update load
@@ -239,7 +239,7 @@ def step(self,
 
         self.mask += tf.concat( [tf.tile(tf.expand_dims(tf.cast(tf.equal(self.load,0),
             tf.float32),1), [1,self.n_cust]),                      
-            tf.expand_dims(tf.multiply(tf.cast(tf.greater(tf.reduce_sum(self.demand,1),0),tf.float32),
+            tf.expand_dims(tf.multiply(tf.cast(tf.greater(tf.reduce_sum(input_tensor=self.demand,axis=1),0),tf.float32),
                              tf.squeeze( tf.cast(tf.equal(idx,self.n_cust),tf.float32))),1)],1)
 
         state = State(load=self.load,
@@ -283,7 +283,7 @@ def reward_func(sample_solution):
     # get the reward based on the route lengths
 
 
-    route_lens_decoded = tf.reduce_sum(tf.pow(tf.reduce_sum(tf.pow(\
-        (sample_solution_tilted - sample_solution) ,2), 2) , .5), 0)
+    route_lens_decoded = tf.reduce_sum(input_tensor=tf.pow(tf.reduce_sum(input_tensor=tf.pow(\
+        (sample_solution_tilted - sample_solution) ,2), axis=2) , .5), axis=0)
     return route_lens_decoded 
 
diff --git a/main.py b/main.py
index c0696c1..bc73542 100644
--- a/main.py
+++ b/main.py
@@ -10,6 +10,8 @@
 from shared.decode_step import RNNDecodeStep
 from model.attention_agent import RLAgent
 
+tf.compat.v1.disable_eager_execution()
+
 def load_task_specific_components(task):
     '''
     This function load task-specific libraries
@@ -36,9 +38,9 @@ def load_task_specific_components(task):
     return DataGenerator, Env, reward_func, AttentionActor, AttentionCritic
 
 def main(args, prt):
-    config = tf.ConfigProto()
+    config = tf.compat.v1.ConfigProto()
     config.gpu_options.allow_growth = True
-    sess = tf.Session(config=config)
+    sess = tf.compat.v1.Session(config=config)
 
     # load task specific classes
     DataGenerator, Env, reward_func, AttentionActor, AttentionCritic = \
@@ -97,7 +99,7 @@ def main(args, prt):
     if random_seed is not None and random_seed > 0:
         prt.print_out("# Set random seed to %d" % random_seed)
         np.random.seed(random_seed)
-        tf.set_random_seed(random_seed)
-    tf.reset_default_graph()
+        tf.compat.v1.set_random_seed(random_seed)
+    tf.compat.v1.reset_default_graph()
 
     main(args, prt)
diff --git a/misc_utils.py b/misc_utils.py
index 733cd9a..13d4616 100644
--- a/misc_utils.py
+++ b/misc_utils.py
@@ -91,11 +91,11 @@ class Logger(object):
     
     def __init__(self, log_dir):
         """Create a summary writer logging to log_dir."""
-        self.writer = tf.summary.FileWriter(log_dir)
+        self.writer = tf.compat.v1.summary.FileWriter(log_dir)
 
     def scalar_summary(self, tag, value, step):
         """Log a scalar variable."""
-        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+        summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)])
         self.writer.add_summary(summary, step)
 
     def image_summary(self, tag, images, step):
@@ -111,14 +111,14 @@ def image_summary(self, tag, images, step):
             scipy.misc.toimage(img).save(s, format="png")
 
             # Create an Image object
-            img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
+            img_sum = tf.compat.v1.Summary.Image(encoded_image_string=s.getvalue(),
                                        height=img.shape[0],
                                        width=img.shape[1])
             # Create a Summary value
-            img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
+            img_summaries.append(tf.compat.v1.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
 
         # Create and write Summary
-        summary = tf.Summary(value=img_summaries)
+        summary = tf.compat.v1.Summary(value=img_summaries)
         self.writer.add_summary(summary, step)
         
     def histo_summary(self, tag, values, step, bins=1000):
@@ -128,7 +128,7 @@ def histo_summary(self, tag, values, step, bins=1000):
         counts, bin_edges = np.histogram(values, bins=bins)
 
         # Fill the fields of the histogram proto
-        hist = tf.HistogramProto()
+        hist = tf.compat.v1.HistogramProto()
         hist.min = float(np.min(values))
         hist.max = float(np.max(values))
         hist.num = int(np.prod(values.shape))
@@ -145,7 +145,7 @@ def histo_summary(self, tag, values, step, bins=1000):
             hist.bucket.append(c)
 
         # Create and write Summary
-        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
+        summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag=tag, histo=hist)])
         self.writer.add_summary(summary, step)
         self.writer.flush()
 
@@ -158,12 +158,12 @@ def _single_cell(unit_type, num_units, forget_bias, dropout, prt,
     # Cell Type
     if unit_type == "lstm":
         prt.print_out("  LSTM, forget_bias=%g" % forget_bias, new_line=False)
-        single_cell = tf.contrib.rnn.BasicLSTMCell(
+        single_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(
                 num_units,
                 forget_bias=forget_bias)
     elif unit_type == "gru":
         prt.print_out("  GRU", new_line=False)
-        single_cell = tf.contrib.rnn.GRUCell(num_units)
+        single_cell = tf.compat.v1.nn.rnn_cell.GRUCell(num_units)
     else:
         raise ValueError("Unknown unit type %s!" % unit_type)
 
@@ -250,15 +250,15 @@ def create_rnn_cell(unit_type, num_units, num_layers, num_residual_layers,
     if len(cell_list) == 1:  # Single layer.
         return cell_list[0]
     else:  # Multi layers
-        return tf.contrib.rnn.MultiRNNCell(cell_list)
+        return tf.compat.v1.nn.rnn_cell.MultiRNNCell(cell_list)
 
 def gradient_clip(gradients, params, max_gradient_norm):
     """Clipping gradients of a model."""
     clipped_gradients, gradient_norm = tf.clip_by_global_norm(
             gradients, max_gradient_norm)
-    gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
+    gradient_norm_summary = [tf.compat.v1.summary.scalar("grad_norm", gradient_norm)]
     gradient_norm_summary.append(
-            tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))
+            tf.compat.v1.summary.scalar("clipped_gradient", tf.linalg.global_norm(clipped_gradients)))
 
     return clipped_gradients, gradient_norm_summary
 
@@ -274,7 +274,7 @@ def create_or_load_model(model, model_dir, session, out_dir, name):
     else:
         utils.print_out("  created %s model with fresh parameters, time %.2fs." %
                                         (name, time.time() - start_time))
-        session.run(tf.global_variables_initializer())
+        session.run(tf.compat.v1.global_variables_initializer())
 
     global_step = model.global_step.eval(session=session)
     return model, global_step
@@ -290,14 +290,14 @@ def add_summary(summary_writer, global_step, tag, value):
     """Add a new summary to the current summary_writer.
     Useful to log things that are not part of the training graph, e.g., tag=BLEU.
     """
-    summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+    summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)])
     summary_writer.add_summary(summary, global_step)
 
 
 def get_config_proto(log_device_placement=False, allow_soft_placement=True):
     # GPU options:
     # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html
-    config_proto = tf.ConfigProto(
+    config_proto = tf.compat.v1.ConfigProto(
             log_device_placement=log_device_placement,
             allow_soft_placement=allow_soft_placement)
     config_proto.gpu_options.allow_growth = True
@@ -311,7 +311,7 @@ def debug_tensor(s, msg=None, summarize=10):
     """Print the shape and value of a tensor at test time. Return a new tensor."""
     if not msg:
         msg = s.name
-    return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize)
+    return tf.compat.v1.Print(s, [tf.shape(input=s), s], msg + " ", summarize=summarize)
 
 def tf_print(tensor, transform=None):
 
@@ -321,7 +321,7 @@ def print_tensor(x):
         # but adding a transformation of some kind usually makes the output more digestible
         print(x if transform is None else transform(x))
         return x
-    log_op = tf.py_func(print_tensor, [tensor], [tensor.dtype])[0]
+    log_op = tf.compat.v1.py_func(print_tensor, [tensor], [tensor.dtype])[0]
     with tf.control_dependencies([log_op]):
         res = tf.identity(tensor)
 
diff --git a/model/attention_agent.py b/model/attention_agent.py
index 411e958..b22888c 100644
--- a/model/attention_agent.py
+++ b/model/attention_agent.py
@@ -50,8 +50,8 @@ def __init__(self,
                         forget_bias=args['forget_bias'], 
                         rnn_layers=args['rnn_layers'],
                         _scope='Actor/')
-        self.decoder_input = tf.get_variable('decoder_input', [1,1,args['embedding_dim']],
-                       initializer=tf.contrib.layers.xavier_initializer())
+        self.decoder_input = tf.compat.v1.get_variable('decoder_input', [1,1,args['embedding_dim']],
+                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
 
         start_time  = time.time()
         if is_train:
@@ -64,8 +64,8 @@ def __init__(self,
         model_time = time.time()- start_time
         self.prt.print_out("It took {}s to build the agent.".format(str(model_time)))
 
-        self.saver = tf.train.Saver(
-            var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
+        self.saver = tf.compat.v1.train.Saver(
+            var_list=tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))
             
         
     def build_model(self, decode_type = "greedy"):
@@ -73,7 +73,7 @@ def build_model(self, decode_type = "greedy"):
         # builds the model
         args = self.args
         env = self.env
-        batch_size = tf.shape(env.input_pnt)[0]
+        batch_size = tf.shape(input=env.input_pnt)[0]
 
         # input_pnt: [batch_size x max_time x 2]
         input_pnt = env.input_pnt
@@ -105,7 +105,7 @@ def build_model(self, decode_type = "greedy"):
         # decoder_state
         initial_state = tf.zeros([args['rnn_layers'], 2, batch_size*beam_width, args['hidden_dim']])
         l = tf.unstack(initial_state, axis=0)
-        decoder_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
+        decoder_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
                   for idx in range(args['rnn_layers'])])            
 
         # start from depot in VRP and from a trainable nodes in TSP
@@ -128,27 +128,27 @@ def build_model(self, decode_type = "greedy"):
             # idx: [batch_size*beam_width x 1]
             beam_parent = None
             if decode_type == 'greedy':
-                idx = tf.expand_dims(tf.argmax(prob, 1),1)
+                idx = tf.expand_dims(tf.argmax(input=prob, axis=1),1)
             elif decode_type == 'stochastic':
                 # select stochastic actions. idx has shape [batch_size x 1]
                 # tf.multinomial sometimes gives numerical errors, so we use our multinomial :(
                 def my_multinomial():
                     prob_idx = tf.stop_gradient(prob)
                     prob_idx_cum = tf.cumsum(prob_idx,1)
-                    rand_uni = tf.tile(tf.random_uniform([batch_size,1]),[1,env.n_nodes])
+                    rand_uni = tf.tile(tf.random.uniform([batch_size,1]),[1,env.n_nodes])
                     # sorted_ind : [[0,1,2,3..],[0,1,2,3..] , ]
                     sorted_ind = tf.cast(tf.tile(tf.expand_dims(tf.range(env.n_nodes),0),[batch_size,1]),tf.int64)
                     tmp = tf.multiply(tf.cast(tf.greater(prob_idx_cum,rand_uni),tf.int64), sorted_ind)+\
                         10000*tf.cast(tf.greater_equal(rand_uni,prob_idx_cum),tf.int64)
 
-                    idx = tf.expand_dims(tf.argmin(tmp,1),1)
+                    idx = tf.expand_dims(tf.argmin(input=tmp,axis=1),1)
                     return tmp, idx
 
                 tmp, idx = my_multinomial()
                 # check validity of tmp -> True or False -- True mean take a new sample
-                tmp_check = tf.cast(tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(tmp,1),(10000*env.n_nodes)-1),
+                tmp_check = tf.cast(tf.reduce_sum(input_tensor=tf.cast(tf.greater(tf.reduce_sum(input_tensor=tmp,axis=1),(10000*env.n_nodes)-1),
                                                           tf.int32)),tf.bool)
-                tmp , idx = tf.cond(tmp_check,my_multinomial,lambda:(tmp,idx))
+                tmp , idx = tf.cond(pred=tmp_check,true_fn=my_multinomial,false_fn=lambda:(tmp,idx))
 
             elif decode_type == 'beam_search':
                 if i==0:
@@ -160,10 +160,10 @@ def my_multinomial():
                     log_beam_probs = []
                     # in the initial decoder step, we want to choose beam_width different branches
                     # log_beam_prob: [batch_size, sourceL]
-                    log_beam_prob = tf.log(tf.split(prob,num_or_size_splits=beam_width, axis=0)[0])
+                    log_beam_prob = tf.math.log(tf.split(prob,num_or_size_splits=beam_width, axis=0)[0])
 
                 elif i > 0:
-                    log_beam_prob = tf.log(prob) + log_beam_probs[-1]
+                    log_beam_prob = tf.math.log(prob) + log_beam_probs[-1]
                     # log_beam_prob:[batch_size, beam_width*sourceL]
                     log_beam_prob = tf.concat(tf.split(log_beam_prob, num_or_size_splits=beam_width, axis=0),1)
 
@@ -171,11 +171,11 @@ def my_multinomial():
                 topk_logprob_val, topk_logprob_ind = tf.nn.top_k(log_beam_prob, beam_width)
 
                 # topk_logprob_val , topk_logprob_ind: [batch_size*beam_width x 1]
-                topk_logprob_val = tf.transpose(tf.reshape(
-                    tf.transpose(topk_logprob_val), [1,-1]))
+                topk_logprob_val = tf.transpose(a=tf.reshape(
+                    tf.transpose(a=topk_logprob_val), [1,-1]))
 
-                topk_logprob_ind = tf.transpose(tf.reshape(
-                    tf.transpose(topk_logprob_ind), [1,-1]))
+                topk_logprob_ind = tf.transpose(a=tf.reshape(
+                    tf.transpose(a=topk_logprob_ind), [1,-1]))
 
                 #idx,beam_parent: [batch_size*beam_width x 1]                               
                 idx = tf.cast(topk_logprob_ind % env.n_nodes, tf.int64) # Which city in route.
@@ -195,7 +195,7 @@ def my_multinomial():
             decoder_input = tf.expand_dims(tf.gather_nd(
                 tf.tile(encoder_emb_inp,[beam_width,1,1]), batched_idx),1)
 
-            logprob = tf.log(tf.gather_nd(prob, batched_idx))
+            logprob = tf.math.log(tf.gather_nd(prob, batched_idx))
             probs.append(prob)
             idxs.append(idx)
             logprobs.append(logprob)           
@@ -221,17 +221,17 @@ def my_multinomial():
         ### critic
         v = tf.constant(0)
         if decode_type=='stochastic':
-            with tf.variable_scope("Critic"):
-                with tf.variable_scope("Encoder"):
+            with tf.compat.v1.variable_scope("Critic"):
+                with tf.compat.v1.variable_scope("Encoder"):
                     # init states
                     initial_state = tf.zeros([args['rnn_layers'], 2, batch_size, args['hidden_dim']])
                     l = tf.unstack(initial_state, axis=0)
-                    rnn_tuple_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
+                    rnn_tuple_state = tuple([tf.compat.v1.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
                               for idx in range(args['rnn_layers'])])
 
                     hy = rnn_tuple_state[0][1]
 
-                with tf.variable_scope("Process"):
+                with tf.compat.v1.variable_scope("Process"):
                     for i in range(args['n_process_blocks']):
 
                         process = self.clAttentionCritic(args['hidden_dim'],_name="P"+str(i))
@@ -242,8 +242,8 @@ def my_multinomial():
                         #[batch_size x h_dim ]
                         hy = tf.squeeze(tf.matmul(tf.expand_dims(prob,1), e ) ,1)
 
-                with tf.variable_scope("Linear"):
-                    v = tf.squeeze(tf.layers.dense(tf.layers.dense(hy,args['hidden_dim']\
+                with tf.compat.v1.variable_scope("Linear"):
+                    v = tf.squeeze(tf.compat.v1.layers.dense(tf.compat.v1.layers.dense(hy,args['hidden_dim']\
                                                                ,tf.nn.relu,name='L1'),1,name='L2'),1)
 
 
@@ -261,18 +261,18 @@ def build_train_step(self):
         R = tf.stop_gradient(R)
 
         # losses
-        actor_loss = tf.reduce_mean(tf.multiply((R-v_nograd),tf.add_n(logprobs)),0)
-        critic_loss = tf.losses.mean_squared_error(R,v)
+        actor_loss = tf.reduce_mean(input_tensor=tf.multiply((R-v_nograd),tf.add_n(logprobs)),axis=0)
+        critic_loss = tf.compat.v1.losses.mean_squared_error(R,v)
 
         # optimizers
-        actor_optim = tf.train.AdamOptimizer(args['actor_net_lr'])
-        critic_optim = tf.train.AdamOptimizer(args['critic_net_lr'])
+        actor_optim = tf.compat.v1.train.AdamOptimizer(args['actor_net_lr'])
+        critic_optim = tf.compat.v1.train.AdamOptimizer(args['critic_net_lr'])
 
         # compute gradients
         actor_gra_and_var = actor_optim.compute_gradients(actor_loss,\
-                                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor'))
+                                tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='Actor'))
         critic_gra_and_var = critic_optim.compute_gradients(critic_loss,\
-                                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic'))
+                                tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='Critic'))
 
         # clip gradients
         clip_actor_gra_and_var = [(tf.clip_by_norm(grad, args['max_grad_norm']), var) \
@@ -301,7 +301,7 @@ def build_train_step(self):
 
     def Initialize(self,sess):
         self.sess = sess
-        self.sess.run(tf.global_variables_initializer())
+        self.sess.run(tf.compat.v1.global_variables_initializer())
         self.load_model()
 
     def load_model(self):
diff --git a/shared/attention.py b/shared/attention.py
index 01f5408..18081c9 100644
--- a/shared/attention.py
+++ b/shared/attention.py
@@ -6,13 +6,13 @@ def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
         self.use_tanh = use_tanh
         self._scope = _scope
 
-        with tf.variable_scope(_scope+_name):
+        with tf.compat.v1.variable_scope(_scope+_name):
             # self.v: is a variable with shape [1 x dim]
-            self.v = tf.get_variable('v',[1,dim],
-                       initializer=tf.contrib.layers.xavier_initializer())
+            self.v = tf.compat.v1.get_variable('v',[1,dim],
+                       initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
             self.v = tf.expand_dims(self.v,2)
-        self.project_query = tf.layers.Dense(dim,_scope=_scope+_name +'/dense')
-        self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/conv1d')
+        self.project_query = tf.compat.v1.layers.Dense(dim,_scope=_scope+_name +'/dense')
+        self.project_ref = tf.compat.v1.layers.Conv1D(dim,1,_scope=_scope+_name +'/conv1d')
         self.C = C  # tanh exploration parameter
         self.tanh = tf.nn.tanh
 
@@ -32,10 +32,10 @@ def __call__(self, query, ref, *args, **kwargs):
         # expanded_q,e: [batch_size x max_time x dim]
         e = self.project_ref(ref)
         q = self.project_query(query) #[batch_size x dim]
-        expanded_q = tf.tile(tf.expand_dims(q,1),[1,tf.shape(e)[1],1])
+        expanded_q = tf.tile(tf.expand_dims(q,1),[1,tf.shape(input=e)[1],1])
 
         # v_view:[batch_size x dim x 1]
-        v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
+        v_view = tf.tile( self.v, [tf.shape(input=e)[0],1,1]) 
         
         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
         #       [batch_size x max_time]
@@ -49,11 +49,11 @@ def __call__(self, query, ref, *args, **kwargs):
         return e, logits
 
 if __name__ == "__main__":
-    sess = tf.InteractiveSession()
-    tf.set_random_seed(100)
-    q = tf.random_uniform([2,128])
-    ref = tf.random_uniform([2,10,128])
+    sess = tf.compat.v1.InteractiveSession()
+    tf.compat.v1.set_random_seed(100)
+    q = tf.random.uniform([2,128])
+    ref = tf.random.uniform([2,10,128])
     attention = Attention(128,use_tanh=True, C=10)
     e, logits = attention(q,ref)
-    sess.run(tf.global_variables_initializer())
+    sess.run(tf.compat.v1.global_variables_initializer())
     print(sess.run([logits, tf.nn.softmax(logits)]))
diff --git a/shared/decode_step.py b/shared/decode_step.py
index 6c3cc33..6538049 100644
--- a/shared/decode_step.py
+++ b/shared/decode_step.py
@@ -172,12 +172,12 @@ def __init__(self,
 #         self.dropout = tf.placeholder(tf.float32,name='decoder_rnn_dropout')
 
         # build a multilayer LSTM cell
-        single_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_dim, 
+        single_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(hidden_dim, 
             forget_bias=forget_bias)
-        self.dropout = tf.placeholder(tf.float32,name='decoder_rnn_dropout') 
+        self.dropout = tf.compat.v1.placeholder(tf.float32,name='decoder_rnn_dropout') 
         single_cell = tf.contrib.rnn.DropoutWrapper(
                 cell=single_cell, input_keep_prob=(1.0 - self.dropout))
-        self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * rnn_layers)
+        self.cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([single_cell] * rnn_layers)
 
     def get_logit_op(self,
                     decoder_inp,
@@ -208,7 +208,7 @@ def get_logit_op(self,
         """
 
 #         decoder_inp = tf.reshape(decoder_inp,[-1,1,self.hidden_dim])
-        _ , decoder_state = tf.nn.dynamic_rnn(self.cell,
+        _ , decoder_state = tf.compat.v1.nn.dynamic_rnn(self.cell,
                                               decoder_inp,
                                               initial_state=decoder_state,
                                               scope=self._scope+'Decoder/LSTM/rnn')
diff --git a/shared/embeddings.py b/shared/embeddings.py
index 085301b..6181480 100644
--- a/shared/embeddings.py
+++ b/shared/embeddings.py
@@ -25,7 +25,7 @@ def __init__(self,embedding_dim,_scope=''):
         '''
 
         super(LinearEmbedding,self).__init__('linear',embedding_dim)
-        self.project_emb = tf.layers.Conv1D(embedding_dim,1,
+        self.project_emb = tf.compat.v1.layers.Conv1D(embedding_dim,1,
             _scope=_scope+'Embedding/conv1d')
 
     def __call__(self,input_pnt):
@@ -37,9 +37,9 @@ def __call__(self,input_pnt):
 
 
 if __name__ == "__main__":
-    sess = tf.InteractiveSession()
-    input_pnt = tf.random_uniform([2,10,2])
+    sess = tf.compat.v1.InteractiveSession()
+    input_pnt = tf.random.uniform([2,10,2])
     Embedding = LinearEmbedding(128)
     emb_inp_pnt = Embedding(input_pnt)
-    sess.run(tf.global_variables_initializer())
-    print(sess.run([emb_inp_pnt,tf.shape(emb_inp_pnt)]))
+    sess.run(tf.compat.v1.global_variables_initializer())
+    print(sess.run([emb_inp_pnt,tf.shape(input=emb_inp_pnt)]))
diff --git a/shared/misc_utils.py b/shared/misc_utils.py
index e8a566f..9b01129 100644
--- a/shared/misc_utils.py
+++ b/shared/misc_utils.py
@@ -61,7 +61,7 @@ def get_time():
 def get_config_proto(log_device_placement=False, allow_soft_placement=True):
         # GPU options:
         # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html
-        config_proto = tf.ConfigProto(
+        config_proto = tf.compat.v1.ConfigProto(
                         log_device_placement=log_device_placement,
                         allow_soft_placement=allow_soft_placement)
         config_proto.gpu_options.allow_growth = True
@@ -71,7 +71,7 @@ def debug_tensor(s, msg=None, summarize=10):
         """Print the shape and value of a tensor at test time. Return a new tensor."""
         if not msg:
                 msg = s.name
-        return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize)
+        return tf.compat.v1.Print(s, [tf.shape(input=s), s], msg + " ", summarize=summarize)
 
 def has_nan(datum, tensor):
         if hasattr(tensor, 'dtype'):
@@ -86,22 +86,22 @@ def has_nan(datum, tensor):
 
 def openAI_entropy(logits):
         # Entropy proposed by OpenAI in their A2C baseline
-        a0 = logits - tf.reduce_max(logits, 2, keepdims=True)
+        a0 = logits - tf.reduce_max(input_tensor=logits, axis=2, keepdims=True)
         ea0 = tf.exp(a0)
-        z0 = tf.reduce_sum(ea0, 2, keepdims=True)
+        z0 = tf.reduce_sum(input_tensor=ea0, axis=2, keepdims=True)
         p0 = ea0 / z0
-        return tf.reduce_mean(tf.reduce_sum(p0 * (tf.log(z0) - a0), 2))
+        return tf.reduce_mean(input_tensor=tf.reduce_sum(input_tensor=p0 * (tf.math.log(z0) - a0), axis=2))
 
 
 def softmax_entropy(p0):
         # Normal information theory entropy by Shannon
-        return - tf.reduce_sum(p0 * tf.log(p0 + 1e-6), axis=1)
+        return - tf.reduce_sum(input_tensor=p0 * tf.math.log(p0 + 1e-6), axis=1)
 
 def Dist_mat(A):
         # A is of shape [batch_size x nnodes x 2].
         # return: a distance matrix with shape [batch_size x nnodes x nnodes]
-        nnodes = tf.shape(A)[1]
+        nnodes = tf.shape(input=A)[1]
         A1 = tf.tile(tf.expand_dims(A,1),[1,nnodes,1,1])
         A2 = tf.tile(tf.expand_dims(A,2),[1,1,nnodes,1])
-        dist = tf.norm(A1-A2,axis=3)
+        dist = tf.norm(tensor=A1-A2,axis=3)
         return dist
\ No newline at end of file