apache · marcoabreu · Jul 31, 2018 · Jul 6, 2018 · Jul 6, 2018 · Jul 6, 2018
@@ -862,6 +862,19 @@ nightly_model_backwards_compat_test() {
     ./tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh
 }
 
+#Backfills S3 bucket with models trained on earlier versions of mxnet
+nightly_model_backwards_compat_train() {
+    set -ex
+    export PYTHONPATH=./python/
+    VENV=mbcc_py2_venv
+    virtualenv -p `which python2` $VENV
+    source $VENV/bin/activate
+    pip install boto3
+    ./tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh
+    #Deactivate the virtual env once we are done with it
+    deactivate
+}
+
 # Deploy
 
 deploy_docs() {

diff --git a/tests/nightly/model_backwards_compatibility_check/README.md b/tests/nightly/model_backwards_compatibility_check/README.md
@@ -14,6 +14,7 @@ This is configuration file for jenkins job.
 - These APIs are covered over models with architectures such as : MLP, RNNs, LeNet covering the four scenarios described above.
 - More operators/models will be added in the future to extend the operator coverage. 
 - The model train files suffixed by `_train.py` and the trained models are hosted in AWS S3.
-- The trained models for now are backfilled into S3 starting from every MXNet release version v1.0.0
+- The trained models for now are backfilled into S3 starting from every MXNet release version v1.1.0.
+- The script for training the models on older versions of MXNet is : `train_mxnet_legacy_models.sh`.
 - The inference files are suffixed by `_inference.py`.
 
diff --git a/tests/nightly/model_backwards_compatibility_check/common.py b/tests/nightly/model_backwards_compatibility_check/common.py
@@ -29,7 +29,10 @@
 from mxnet.gluon.data.vision import transforms, datasets
 from mxnet import autograd as ag
 import mxnet.ndarray as F
-from mxnet.gluon import nn
+from mxnet.gluon import nn, rnn
+import re
+import time
+import sys
 
 # Set fixed random seeds.
 mx.random.seed(7)
@@ -111,6 +114,15 @@ def save_inference_results(inference_results_file, inference_results):
     with open(inference_results_file, 'w') as file:
         json.dump(inference_results, file)
 
+
+def compare_versions(version1, version2):
+    '''
+    https://stackoverflow.com/questions/1714027/version-number-comparison-in-python
+    '''
+    def normalize(v):
+        return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")]
+    return cmp(normalize(version1), normalize(version2))
+
 def get_val_test_iter():
     data = prepare_mnist_data(mx.test_utils.get_mnist())
     val = data['val']
@@ -165,3 +177,163 @@ def forward(self, x):
         x = F.tanh(self.fc1(x))
         x = F.tanh(self.fc2(x))
         return x
+
+class Dictionary(object):
+    def __init__(self):
+        self.word2idx = {}
+        self.idx2word = []
+
+    def add_word(self, word):
+        if word not in self.word2idx:
+            self.idx2word.append(word)
+            self.word2idx[word] = len(self.idx2word) - 1
+        return self.word2idx[word]
+
+    def __len__(self):
+        return len(self.idx2word)
+
+class Corpus(object):
+    def __init__(self, path):
+        self.dictionary = Dictionary()
+        self.download_data_from_s3()
+        self.train = self.tokenize(path + 'train.txt')
+        self.valid = self.tokenize(path + 'valid.txt')
+        self.test = self.tokenize(path + 'test.txt')
+
+    def download_data_from_s3(self, ):
+        print ('Downloading files from bucket : ptb-small-dataset' )
+        bucket = s3.Bucket('ptb-small-dataset')
+        files = ['test.txt', 'train.txt', 'valid.txt']
+        for file in files:
+            if os.path.exists(args_data + file) :
+                print ('File %s'%(args_data + file), 'already exists. Skipping download')
+                continue
+            file_path = args_data + file
+            bucket.download_file(file_path, args_data + file) 
+
+    def tokenize(self, path):
+        """Tokenizes a text file."""
+        assert os.path.exists(path)
+        # Add words to the dictionary
+        with open(path, 'r') as f:
+            tokens = 0
+            for line in f:
+                words = line.split() + ['<eos>']
+                tokens += len(words)
+                for word in words:
+                    self.dictionary.add_word(word)
+
+        # Tokenize file content
+        with open(path, 'r') as f:
+            ids = np.zeros((tokens,), dtype='int32')
+            token = 0
+            for line in f:
+                words = line.split() + ['<eos>']
+                for word in words:
+                    ids[token] = self.dictionary.word2idx[word]
+                    token += 1
+
+        return mx.nd.array(ids, dtype='int32')
+
+
+
+#### Common utilies for lm_rnn_gluon_train & inference files
+args_data = 'ptb.'
+args_model = 'rnn_relu'
+args_emsize = 100
+args_nhid = 100
+args_nlayers = 2
+args_lr = 1.0
+args_clip = 0.2
+args_epochs = 2
+args_batch_size = 32
+args_bptt = 5
+args_dropout = 0.2
+args_tied = True
+args_cuda = 'store_true'
+args_log_interval = 500
+
+class RNNModel(gluon.Block):
+    """A model with an encoder, recurrent layer, and a decoder."""
+
+    def __init__(self, mode, vocab_size, num_embed, num_hidden,
+                 num_layers, dropout=0.5, tie_weights=False, **kwargs):
+        super(RNNModel, self).__init__(**kwargs)
+        with self.name_scope():
+            self.drop = nn.Dropout(dropout)
+            self.encoder = nn.Embedding(vocab_size, num_embed,
+                                        weight_initializer = mx.init.Uniform(0.1))
+            if mode == 'rnn_relu':
+                self.rnn = rnn.RNN(num_hidden, num_layers, activation='relu', dropout=dropout,
+                                   input_size=num_embed)
+            elif mode == 'rnn_tanh':
+                self.rnn = rnn.RNN(num_hidden, num_layers, dropout=dropout,
+                                   input_size=num_embed)
+            elif mode == 'lstm':
+                self.rnn = rnn.LSTM(num_hidden, num_layers, dropout=dropout,
+                                    input_size=num_embed)
+            elif mode == 'gru':
+                self.rnn = rnn.GRU(num_hidden, num_layers, dropout=dropout,
+                                   input_size=num_embed)
+            else:
+                raise ValueError("Invalid mode %s. Options are rnn_relu, "
+                                 "rnn_tanh, lstm, and gru"%mode)
+            if tie_weights:
+                self.decoder = nn.Dense(vocab_size, in_units = num_hidden,
+                                        params = self.encoder.params)
+            else:
+                self.decoder = nn.Dense(vocab_size, in_units = num_hidden)
+            self.num_hidden = num_hidden
+
+    def forward(self, inputs, hidden):
+        emb = self.drop(self.encoder(inputs))
+        output, hidden = self.rnn(emb, hidden)
+        output = self.drop(output)
+        decoded = self.decoder(output.reshape((-1, self.num_hidden)))
+        return decoded, hidden
+
+    def begin_state(self, *args, **kwargs):
+        return self.rnn.begin_state(*args, **kwargs)
+
+def batchify(data, batch_size):
+    """Reshape data into (num_example, batch_size)"""
+    nbatch = data.shape[0] // batch_size
+    data = data[:nbatch * batch_size]
+    data = data.reshape((batch_size, nbatch)).T
+    return data
+
+def get_batch(source, i):
+    seq_len = min(args_bptt, source.shape[0] - 1 - i)
+    data = source[i : i + seq_len]
+    target = source[i + 1 : i + 1 + seq_len]
+    return data, target.reshape((-1,))
+
+def detach(hidden):
+    if isinstance(hidden, (tuple, list)):
+        hidden = [i.detach() for i in hidden]
+    else:
+        hidden = hidden.detach()
+    return hidden
+
+def eval(data_source, model):
+    total_L = 0.0
+    ntotal = 0
+    loss = gluon.loss.SoftmaxCrossEntropyLoss()
+    hidden = model.begin_state(func = mx.nd.zeros, batch_size = args_batch_size, ctx=mx.cpu(0))
+    for i in range(0, data_source.shape[0] - 1, args_bptt):
+        data, target = get_batch(data_source, i)
+        output, hidden = model(data, hidden)
+        L = loss(output, target)
+        total_L += mx.nd.sum(L).asscalar()
+        ntotal += L.size
+    return total_L / ntotal
+
+def clean_ptb_data():
+    files = ['test.txt', 'train.txt', 'valid.txt']
+    for file in files: 
+        if os.path.isfile(args_data + file):
+            os.remove(args_data + file)
+
+# This function is added so that if a download gets interrupted in between, one can clean the corrupted files
+clean_mnist_data()
+clean_ptb_data()