awslabs · sandeep-krishnamurthy · May 13, 2018 · May 7, 2018
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -19,11 +19,11 @@ We provide benchmark scripts to run on CIFAR-10, ImageNet and Synthetic Dataset(
 [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset has 60000 32x32 color images in 10 classes.
 The [training scripts](https://github.com/awslabs/keras-apache-mxnet/blob/master/benchmark/image-classification/benchmark_resnet.py)
  will automatically download the dataset, you need to provide dataset name, resnet version 
-(1 or 2), number of layers (20, 56, or 110), number of GPUs to use. 
+(1 or 2), number of layers (20, 56, or 110), number of GPUs to use, and number of epoch to use(optional, Default:200). 
 
 Example Usage:
 
-`python benchmark_resnet.py --dataset cifar10 --version 1 --layers 56 --gpus 4`
+`python benchmark_resnet.py --dataset cifar10 --version 1 --layers 56 --gpus 4 --epoch 20`
 
 
 ### ImageNet Dataset
@@ -42,19 +42,19 @@ Compare to CIFAR-10, you need to provide additional params: training mode and pa
 
 Example usage:
 
-`python benchmark_resnet.py --dataset imagenet --mxnet_backend_training_speed.pngversion 1 -layers 56 --gpus 4 --train_mode train_on_batch --data_path home/ubuntu/imagenet/train/`
+`python benchmark_resnet.py --dataset imagenet --mxnet_backend_training_speed.pngversion 1 -layers 56 --gpus 4 --epoch 20 --train_mode train_on_batch --data_path home/ubuntu/imagenet/train/`
 
 ### Synthetic Dataset
 We used benchmark scripts from 
 [TensorFlow Benchmark](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks/scripts/keras_benchmarks) 
 official repo, and modified slightly for our use case.
 
 Directly run the shell script to launch the benchmark, provide one of the configurations in config.json and whether 
-you want to benchmark inference speed (True or False). 
+you want to benchmark inference speed (True or False), and number of epoch to use(optional, Default:200). 
 
 Example Usage:
 
-`sh run_<backend-type>_backend.sh gpu_config False`
+`sh run_<backend-type>_backend.sh gpu_config False 20`
 
 ### CNN Benchmark Results
 Here we list the result of MXNet backend training speed on CIFAR-10, ImageNet and Synthetic Data using 
@@ -84,11 +84,11 @@ Note: X-axis is number of GPUs used, Y-axis is training speed(images/second)
 
 We provide benchmark scripts to run on Synthetic(randomly generated), Nietzsche, and WikiText-2 character level Dataset.
 
-Directly run the shell script to launch the benchmark, provide one of the configurations in config.json and whether you want to benchmark inference speed (True or False). 
+Directly run the shell script to launch the benchmark, provide one of the configurations in config.json and whether you want to benchmark inference speed (True or False), and number of epoch to use(optional, Default:20). 
 
 Example Usage:
 
-`sh run_<backend-type>_backend.sh gpu_config False`
+`sh run_<backend-type>_backend.sh gpu_config False 20`
 
 ### Synthetic Dataset
 
@@ -158,4 +158,4 @@ Synthetic Data scripts modified from
 [TensorFlow Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks)
 
 ## Reference
-[1] [TensorFlow Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks)
+[1] [TensorFlow Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks)
diff --git a/benchmark/scripts/benchmark_resnet.py b/benchmark/scripts/benchmark_resnet.py
@@ -47,6 +47,8 @@
                     help='Required for imagenet: train_on_batch or fit_generator')
 parser.add_argument('--data_path',
                     help='Required for imagenet: path_to_imagenet_data')
+parser.add_argument('--epoch', default=200, type=int,
+                    help='Number of epoch')
 
 args = parser.parse_args()
 
@@ -80,7 +82,7 @@
 
 # Training parameters
 batch_size = 32 * num_gpus if num_gpus > 0 else 32
-epochs = 200
+epochs = int(args.epoch)
 num_classes = 1000 if args.dataset == "imagenet" else 10
 data_format = K._image_data_format
 print('using image format:', data_format)

diff --git a/benchmark/scripts/logging_metrics.py b/benchmark/scripts/logging_metrics.py
@@ -0,0 +1,72 @@
+import keras
+
+
+class LoggingMetrics:
+    """Callback that save metrics to a logfile.
+
+    # Arguments
+        history_callback: instance of `keras.callbacks.History`.
+            Training parameters
+            (eg. batch size, number of epochs, loss, acc).
+        time_callback: instance of `keras.callbacks.Callback`.
+            Training parameters
+            (eg. time, time-step, speed).
+
+    # Raises
+        TypeError: In case of invalid object instance.
+    """
+
+    def __init__(self, history_callback, time_callback):
+        self.num_iteration = None
+        self.metric_list = []
+        self.pattern_list = []
+        self.retrieve_metrics(history_callback, time_callback)
+
+    def retrieve_metrics(self, history_callback, time_callback):
+        if not isinstance(history_callback, keras.callbacks.History):
+            raise TypeError('`history_callback` should be an instance of '
+                            '`keras.callbacks.History`')
+        if not isinstance(time_callback, keras.callbacks.Callback):
+            raise TypeError('`time_callback` should be an instance of '
+                            '`keras.callbacks.Callback`')
+
+        if hasattr(history_callback, 'epoch'):
+            self.metric_list.append(history_callback.epoch)
+            self.pattern_list.append('[Epoch %d]\t')
+
+        if hasattr(time_callback, 'times'):
+            self.metric_list.append(time_callback.get_time())
+            self.metric_list.append(time_callback.get_time_step())
+            self.metric_list.append(time_callback.get_speed())
+            self.pattern_list.append('time: %s\t')
+            self.pattern_list.append('time_step: %s\t')
+            self.pattern_list.append('speed: %s\t')
+
+        if 'loss' in history_callback.history:
+            self.metric_list.append(history_callback.history['loss'])
+            self.pattern_list.append('train_loss: %.4f\t')
+
+        if 'acc' in history_callback.history:
+            self.metric_list.append(history_callback.history['acc'])
+            self.pattern_list.append('train_acc: %.4f\t')
+
+        if 'val_loss' in history_callback.history:
+            self.metric_list.append(history_callback.history['val_loss'])
+            self.pattern_list.append('val_loss: %.4f\t')
+
+        if 'val_acc' in history_callback.history:
+            self.metric_list.append(history_callback.history['val_acc'])
+            self.pattern_list.append('val_acc: %.4f\t')
+
+        self.num_iteration = history_callback.params['epochs']
+
+    def get_metrics_index(self, idx):
+        idx_metric_list = []
+        for metric in self.metric_list:
+            idx_metric_list.append(metric[idx])
+        return tuple(idx_metric_list)
+
+    def save_metrics_to_log(self, logging):
+        pattern_str = ''.join(self.pattern_list)
+        for i in range(self.num_iteration):
+            logging.info(pattern_str % self.get_metrics_index(i))
diff --git a/benchmark/scripts/models/lstm_synthetic.py b/benchmark/scripts/models/lstm_synthetic.py
@@ -38,7 +38,8 @@ def __init__(self):
         self.epochs = 4
         self.num_samples = 50000
 
-    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False):
+    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
+        self.epochs = epochs
         print("Running model ", self.test_name)
         keras.backend.set_learning_phase(True)
 

diff --git a/benchmark/scripts/models/lstm_text_generation.py b/benchmark/scripts/models/lstm_text_generation.py
@@ -42,7 +42,8 @@ def __init__(self, dataset_name=None):
         self.epochs = 60
         self.dataset_name = dataset_name
 
-    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False):
+    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
+        self.epochs = epochs
         print("Running model ", self.test_name)
         keras.backend.set_learning_phase(True)
 

diff --git a/benchmark/scripts/models/resnet50_benchmark.py b/benchmark/scripts/models/resnet50_benchmark.py
@@ -31,7 +31,8 @@ def __init__(self):
         self.num_samples = 1000
         self.test_type = 'tf.keras, eager_mode'
 
-    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False):
+    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
+        self.epochs = epochs
         print("Running model ", self.test_name)
         keras.backend.set_learning_phase(True)
 

diff --git a/benchmark/scripts/models/resnet50_benchmark_tf_keras.py b/benchmark/scripts/models/resnet50_benchmark_tf_keras.py
@@ -38,7 +38,8 @@ def __init__(self):
         self.num_samples = 1000
         self.test_type = 'tf.keras, eager_mode'
 
-    def run_benchmark(self, gpus=0, use_dataset_tensors=False):
+    def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
+        self.epochs = epochs
         print("Running model ", self.test_name)
         # tfe.enable_eager_execution()
         tf.keras.backend.set_learning_phase(True)

diff --git a/benchmark/scripts/models/timehistory.py b/benchmark/scripts/models/timehistory.py
@@ -8,11 +8,69 @@
 
 
 class TimeHistory(keras.callbacks.Callback):
-    def on_train_begin(self, logs={}):
+    """Callback that extract execution time of every epoch, time-step,
+    and speed in terms of sample per sec
+    """
+
+    def __init__(self):
+        super(TimeHistory, self).__init__()
+        self.times = []
+
+    def on_train_begin(self, logs=None):
         self.times = []
 
-    def on_epoch_begin(self, batch, logs={}):
+    def on_epoch_begin(self, batch, logs=None):
         self.epoch_time_start = time.time()
 
-    def on_epoch_end(self, batch, logs={}):
+    def on_epoch_end(self, batch, logs=None):
         self.times.append(time.time() - self.epoch_time_start)
+
+    def get_num_samples(self):
+        if 'samples' in self.params:
+            return self.params['samples']
+        elif 'steps' in self.params:
+            return self.params['steps']
+        else:
+            raise ValueError('Incorrect metric parameter')
+
+    def __reformat(self, var):
+        if var >= 1:
+            var = '%.2f ' % var
+            time_format = 'sec'
+        elif var >= 1e-3:
+            var = '%.2f ' % (var * 1e3)
+            time_format = 'msec'
+        else:
+            var = '%.2f ' % (var * 1e6)
+            time_format = 'usec'
+        return var, time_format
+
+    def get_time_step(self):
+        time_list = []
+        num_samples = self.get_num_samples()
+        for t in self.times:
+            speed = t / num_samples
+            speed, time_format = self.__reformat(speed)
+            time_list.append(speed + time_format + '/step')
+        return time_list
+
+    def get_total_time(self):
+        total_time = sum(self.times)
+        total_time, time_format = self.__reformat(total_time)
+        return total_time + time_format
+
+    def get_time(self):
+        time_list = []
+        for t in self.times:
+            time, time_format = self.__reformat(t)
+            time_list.append(time + time_format)
+        return time_list
+
+    def get_speed(self):
+        samples_list = []
+        num_samples = self.get_num_samples()
+        for t in self.times:
+            sample_sec = num_samples / t
+            sample_sec, time_format = self.__reformat(sample_sec)
+            samples_list.append(sample_sec + 'samples/' + time_format)
+        return samples_list
diff --git a/benchmark/scripts/run_benchmark.py b/benchmark/scripts/run_benchmark.py
@@ -29,6 +29,8 @@
                          'uploading metrics to BigQuery. This is useful when '
                          'you are testing new models and do not want data '
                          'corruption.')
+parser.add_argument('--epochs',
+                    help='Number of epochs')
 
 args = parser.parse_args()
 
@@ -41,6 +43,11 @@
     if args.inference == 'True':
         inference = True
 
+if args.epochs:
+    if not args.epochs.isdigit():
+        print('error: invalid int value: ', args.epochs)
+        sys.exit()
+
 # Load the json config file for the requested mode.
 config_file = open(args.pwd + "/config.json", 'r')
 config_contents = config_file.read()
@@ -58,6 +65,9 @@ def get_backend_version():
 model = model_config.get_model_config(args.model_name)
 
 use_dataset_tensors = False
-model.run_benchmark(gpus=config['gpus'], inference=inference, use_dataset_tensors=use_dataset_tensors)
+if args.epochs:
+    model.run_benchmark(gpus=config['gpus'], inference=inference, use_dataset_tensors=use_dataset_tensors, epochs=int(args.epochs))
+else:
+    model.run_benchmark(gpus=config['gpus'], inference=inference, use_dataset_tensors=use_dataset_tensors)
 if args.dry_run:
     print("Model :total_time", model.test_name, model.total_time)
diff --git a/benchmark/scripts/run_mxnet_backend.sh b/benchmark/scripts/run_mxnet_backend.sh
@@ -16,6 +16,6 @@ models='resnet50'
 dir=`pwd`
 for name in $models
 do
-  python $dir/run_benchmark.py  --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True --inference="$2"
+  python $dir/run_benchmark.py  --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True --inference="$2" --epochs="$3"
 done
-#!/usr/bin/env bash
+#!/usr/bin/env bash
diff --git a/benchmark/scripts/run_tf_backend.sh b/benchmark/scripts/run_tf_backend.sh
@@ -16,6 +16,6 @@ models='resnet50'
 dir=`pwd`
 for name in $models
 do
-  python $dir/run_benchmark.py --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True --inference="$2"
+  python $dir/run_benchmark.py --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True --inference="$2" --epochs="$3"
 done
 #!/usr/bin/env bash
diff --git a/benchmark/scripts/run_tf_keras_backend.sh b/benchmark/scripts/run_tf_keras_backend.sh
@@ -16,6 +16,6 @@ models='resnet50_tf_keras'
 dir=`pwd`
 for name in $models
 do
-  python $dir/run_benchmark.py  --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True
+  python $dir/run_benchmark.py  --pwd=$dir --mode="$1" --model_name="$name" --dry_run=True --inference="$2" --epochs="$3"
 done
-#!/usr/bin/env bash
+#!/usr/bin/env bash