diff --git a/egs/fisher_english/s5/local/chain/compare_wer_general.py b/egs/fisher_english/s5/local/chain/compare_wer_general.py
new file mode 100755
index 00000000000..e3a2dc5417a
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/compare_wer_general.py
@@ -0,0 +1,239 @@
+#! /usr/bin/env python
+
+import argparse
+import collections
+import os
+import re
+import sys
+
+sys.path.insert(0, 'steps')
+import libs.common as common_lib
+
+from collections import defaultdict
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""
+This script is used for comparing decoding results between systems.
+e.g. local/chain/compare_wer_general.py exp/chain_cleaned/tdnn_{c,d}_sp
+For use with discriminatively trained systems you specify the epochs after a colon:
+for instance,
+local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_c_sp exp/chain_cleaned/tdnn_c_sp_smbr:{1,2,3}
+""")
+
+    parser.add_argument("--separator", type=str, default=" ",
+                        help="Separator for different fields")
+    parser.add_argument("--print-fine-details", action='store_true',
+                        help="Add fine details of insertions, substitutions "
+                        "and deletions.")
+    parser.add_argument("--include-looped", action='store_true',
+                        help="Used to include looped results")
+    parser.add_argument("--field-size", type=int,
+                        help="Field size for the models")
+    parser.add_argument("systems", nargs='+')
+
+    args = parser.parse_args()
+    return args
+
+
+def parse_system_string(system_string):
+    parts = system_string.split(":")
+    if len(parts) not in [1, 2, 3]:
+        raise RuntimeError("Unable to parse system string {0}"
+                           "".format(system_string))
+
+    dir_name = parts[0]
+
+    suffix = ""
+    if len(parts) > 1:
+        suffix = parts[1]
+
+    model_name = os.path.basename(dir_name)
+    if len(parts) > 2:
+        model_name = parts[2]
+
+    return (dir_name, suffix, model_name)
+
+
+class SystemInfo(object):
+    def __init__(self, dir_name, suffix, model_name):
+        self.dir_name = dir_name
+        self.suffix = suffix
+        self.model_name = model_name
+        self.iter_ = "final"
+
+        if self.suffix != "":
+            m = re.search("_iter(\d+)", suffix)
+            if bool(m):
+                self.iter_ = m.group(1)
+        else:
+            used_epochs = False
+
+        self.probs = []
+        self.wers = defaultdict(lambda: "NA")
+        self.ins = defaultdict(lambda: "NA")
+        self.dels = defaultdict(lambda: "NA")
+        self.sub = defaultdict(lambda: "NA")
+
+    def add_wer(self, dev_set, affix=""):
+        decode_name = dev_set + self.suffix
+
+        out = common_lib.get_command_stdout(
+            "grep WER {dir_name}/decode{affix}_{decode_name}/wer* | utils/best_wer.sh"
+            "".format(dir_name=self.dir_name, affix=affix,
+                      decode_name=decode_name),
+            require_zero_status=False)
+
+        if out != "" and len(out.split()) >= 2:
+            self.wers[(dev_set, affix)] = out.split()[1]
+            self.ins[(dev_set, affix)] = out.split()[6]
+            self.dels[(dev_set, affix)] = out.split()[8]
+            self.sub[(dev_set, affix)] = out.split()[10]
+
+    def _get_prob(self, set_="train", xent=False):
+
+        if not os.path.exists(
+            "{dir_name}/log/compute_prob_{set}.{iter}.log"
+            "".format(dir_name=self.dir_name, set=set_, iter=self.iter_)):
+            return "NA"
+
+        out = common_lib.get_command_stdout(
+            "grep Overall {dir_name}/log/compute_prob_{set}.{iter}.log | "
+            "grep {opt} xent".format(dir_name=self.dir_name, set=set_,
+                                     iter=self.iter_,
+                                     opt="-w" if xent else "-v"),
+            require_zero_status=False)
+
+        if out == "":
+            return "NA"
+
+        lines = out.split("\n")
+        prob = None
+
+        affix = "-xent" if xent else ""
+        for line in lines:
+            if (bool(re.search(r"'output-0{0}'".format(affix), line))
+                    or bool(re.search(r"'output{0}'".format(affix), line))):
+                prob = float(line.split()[7])
+                break
+
+        return "NA" if prob is None else "{0:.4f}".format(prob)
+
+    def add_probs(self):
+        self.probs.append(self._get_prob(set_="train", xent=False))
+        self.probs.append(self._get_prob(set_="valid", xent=False))
+        self.probs.append(self._get_prob(set_="train", xent=True))
+        self.probs.append(self._get_prob(set_="valid", xent=True))
+
+
+def run(args):
+    used_epochs = False
+    systems = []
+    for sys_string in args.systems:
+        dir_name, suffix, model_name = parse_system_string(sys_string)
+        info = SystemInfo(dir_name, suffix, model_name)
+
+        if suffix != "" and re.search(suffix, "epoch"):
+            used_epochs = True
+        else:
+            used_epochs = False
+
+        for dev_set in ["dev", "test"]:
+            info.add_wer(dev_set)
+
+            if args.include_looped:
+                info.add_wer(dev_set, affix="_looped")
+
+        if not used_epochs:
+            info.add_probs()
+
+        systems.append(info)
+
+    print_system_infos(args, systems, used_epochs)
+
+
+def print_system_infos(args, system_infos, used_epochs=False):
+    field_sizes = [args.field_size] * len(system_infos)
+
+    if args.field_size is None:
+        for i, x in enumerate(system_infos):
+            field_sizes[i] = len(x.model_name)
+
+    separator = args.separator
+    print ("# {0: <25}{sep}{1}".format(
+        "System",
+        "{sep}".format(sep=args.separator).join(
+            ["{0: <{1}}".format(x.model_name, field_sizes[i])
+             for i, x in enumerate(system_infos)]),
+        sep=args.separator))
+
+    tups = set()
+    for sys_info in system_infos:
+        for tup in sys_info.wers:
+            tups.add(tup)
+
+    for tup in sorted(list(tups)):
+        dev_set, affix = tup
+        print ("# {0: <25}{sep}{1}".format(
+            "WER on {0} {1}"
+            "".format(dev_set, "[ "+affix+" ]" if affix != "" else ""),
+            "{sep}".format(sep=args.separator).join(
+                ["{0: <{1}}".format(x.wers[tup], field_sizes[i])
+                 for i, x in enumerate(system_infos)]),
+            sep=args.separator))
+        if args.print_fine_details:
+            print ("# {0: <25}{sep}{1}".format(
+                "#Ins on {0} {1}"
+                "".format(dev_set, "[ "+affix+" ]" if affix != "" else ""),
+                "{sep}".format(sep=args.separator).join(
+                    ["{0: <{1}}".format(x.ins[tup], field_sizes[i])
+                     for i, x in enumerate(system_infos)]),
+                sep=args.separator))
+            print ("# {0: <25}{sep}{1}".format(
+                "#Del on {0} {1}"
+                "".format(dev_set, "[ "+affix+" ]" if affix != "" else ""),
+                "{sep}".format(sep=args.separator).join(
+                    ["{0: <{1}}".format(x.dels[tup], field_sizes[i])
+                     for i, x in enumerate(system_infos)]),
+                sep=args.separator))
+            print ("# {0: <25}{sep}{1}".format(
+                "#Sub on {0} {1}"
+                "".format(dev_set, "[ "+affix+" ]" if affix != "" else ""),
+                "{sep}".format(sep=args.separator).join(
+                    ["{0: <{1}}".format(x.sub[tup], field_sizes[i])
+                     for i, x in enumerate(system_infos)]),
+                sep=args.separator))
+
+    if not used_epochs:
+        print ("# {0: <25}{sep}{1}".format(
+            "Final train prob",
+            "{sep}".format(sep=args.separator).join(
+                ["{0: <{1}}".format(x.probs[0], field_sizes[i])
+                 for i, x in enumerate(system_infos)]),
+            sep=args.separator))
+
+        print ("# {0: <25}{sep}{1}".format(
+            "Final valid prob",
+            "{sep}".format(sep=args.separator).join(
+                ["{0: <{1}}".format(x.probs[1], field_sizes[i])
+                 for i, x in enumerate(system_infos)]),
+            sep=args.separator))
+
+        print ("# {0: <25}{sep}{1}".format(
+            "Final train prob (xent)",
+            "{sep}".format(sep=args.separator).join(
+                ["{0: <{1}}".format(x.probs[2], field_sizes[i])
+                 for i, x in enumerate(system_infos)]),
+            sep=args.separator))
+
+        print ("# {0: <25}{sep}{1}".format(
+            "Final valid prob (xent)",
+            "{sep}".format(sep=args.separator).join(
+                ["{0: <{1}}".format(x.probs[3], field_sizes[i])
+                 for i, x in enumerate(system_infos)]),
+            sep=args.separator))
+
+
+if __name__ == "__main__":
+    args = get_args()
+    run(args)
diff --git a/egs/fisher_english/s5/local/chain/confidence_calibration.sh b/egs/fisher_english/s5/local/chain/confidence_calibration.sh
new file mode 100755
index 00000000000..34a487085aa
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/confidence_calibration.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+. cmd.sh
+. path.sh
+
+chaindir=exp/chain_semi350k_conf/tdnn_xxsup1a_sp
+arpa_gz=data/local/lm_ex250k/3gram-mincount/lm_unpruned.gz
+graph_affix=_ex250k
+decode_affix=
+train_set=train_sup_5k_calib_train
+dev_set=dev_sup_5k_calib_dev
+
+. utils/parse_options.sh
+
+set -euxo pipefail
+
+train_data=data/${train_set}_hires
+dev_data=data/${dev_set}_hires
+
+decode_affix=${decode_affix}${graph_affix}
+graphdir=$chaindir/graph${graph_affix}
+train_caldir=$chaindir/decode_${train_set}${decode_affix}/confidence
+dev_caldir=$chaindir/decode_${dev_set}${decode_affix}/confidence
+
+###### Data preparation,
+
+# Prepare filtering for excluding data from train-set (1 .. keep word, 0 .. exclude word),
+# - only excludes from training-targets, the confidences are recalibrated for all the words,
+word_filter=$(mktemp)
+awk '{ keep_the_word = $1 !~ /^(\[.*\]|<.*>|%.*|!.*|-.*|.*-)$/; print $0, keep_the_word }' \
+  $graphdir/words.txt >$word_filter
+
+# Calcualte the word-length,
+word_length=$(mktemp)
+awk '{if(r==0) { len_hash[$1] = NF-2; } 
+      if(r==1) { if(len_hash[$1]) { len = len_hash[$1]; } else { len = -1 }  
+      print $0, len; }}' \
+  r=0 $graphdir/phones/align_lexicon.txt \
+  r=1 $graphdir/words.txt \
+  >$word_length
+
+# Extract unigrams,
+unigrams=$(mktemp); steps/conf/parse_arpa_unigrams.py $graphdir/words.txt $arpa_gz $unigrams
+
+###### Paste the 'word-specific' features (first 4 columns have fixed position, more feature-columns can be added),
+# Format: "word word_id filter length other_features"
+word_feats=$(mktemp)
+paste $word_filter <(awk '{ print $3 }' $word_length) <(awk '{ print $3 }' $unigrams) > $word_feats
+
+
+###### Train the calibration,
+steps/conf/train_calibration.sh --cmd "$decode_cmd" --lmwt 10 \
+  $train_data $graphdir $word_feats \
+  $chaindir/decode_${train_set}${decode_affix} $train_caldir
+
+###### Apply the calibration to eval set,
+steps/conf/apply_calibration.sh --cmd "$decode_cmd" \
+  $dev_data $graphdir $chaindir/decode_${dev_set}${decode_affix} \
+  $train_caldir $dev_caldir
+# The final confidences are here '$eval_caldir/ctm_calibrated',
+
+exit 0
+
+###### Sclite scoring,
+# We will produce NCE which shows the ``quality'' of the confidences.
+# Please compare with the default scoring script for your database.
+
+# Scoring tools,
+hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
+hubdir=`dirname $hubscr`
+
+# Inputs,
+ctm=$eval_caldir/ctm_calibrated
+stm=$eval_data/stm
+glm=$eval_data/glm
+
+# Normalizng CTM, just like in 'local/score_sclite.sh',
+cat $ctm | grep -i -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \
+  grep -i -v -E '<UNK>' | \
+  grep -i -v -E ' (UH|UM|EH|MM|HM|AH|HUH|HA|ER|OOF|HEE|ACH|EEE|EW) ' | \
+  awk '$5 !~ /^.*-$/' | \
+  local/map_acronyms_ctm.py -M data/local/dict_nosp/acronyms.map -i - -o ${ctm}.filt
+
+# Mapping the time info to global,
+utils/convert_ctm.pl $eval_data/segments $eval_data/reco2file_and_channel <${ctm}.filt >${ctm}.filt.conv
+
+# Scoring,
+$hubscr -p $hubdir -V -l english -h hub5 -g $glm -r $stm ${ctm}.filt.conv
+
diff --git a/egs/fisher_english/s5/local/chain/run_semisupervised.sh b/egs/fisher_english/s5/local/chain/run_semisupervised.sh
new file mode 100755
index 00000000000..77ae92e49b6
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/run_semisupervised.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+stage=-2
+nj=30
+decode_nj=30
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k  # affix relating train-set splitting proportion
+
+tdnn_affix=_sup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# combination options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+comb_affix=_comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+unsup_egs_weight=1.0
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+left_tolerance=2
+right_tolerance=2
+train_combined_opts="--num-epochs 4.5"
+graph_affix=   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+# to tune:
+# frames_per_eg for unsupervised
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+if [ $stage -le -1 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires/ivector_online.scp ]; then
+  echo "$0: getting ivectors for the hires unsupervised data data/${unsupervised_set}_hires"
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+            data/${unsupervised_set}_hires exp/nnet3${nnet3_affix}/extractor \
+            exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires
+fi
+
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+cmvn_opts=`cat $chaindir/cmvn_opts`
+
+if [ $stage -le 0 ]; then
+  echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+  graphdir=$chaindir/graph${graph_affix}
+  if [ ! -f $graphdir/HCLG.fst ]; then
+    utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+  fi
+  steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+            --acwt 1.0 --post-decode-acwt 10.0 \
+            --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+            --scoring-opts "--min-lmwt 5 " \
+            $chaindir/graph${graph_affix} data/${unsupervised_set}_hires $chaindir/decode_${unsupervised_set}${decode_affix}
+  ln -s ../final.mdl $chaindir/decode_${unsupervised_set}${decode_affix}/final.mdl || true
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $left_tolerance --right-tolerance $right_tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --egs-weight $unsup_egs_weight \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             data/${unsupervised_set}_hires $chaindir \
+             ${chaindir}/decode_${unsupervised_set}${decode_affix} $chaindir/unsup_egs${decode_affix}${egs_affix}
+fi
+
+sup_egs_dir=$chaindir/egs
+unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}
+comb_egs_dir=$chaindir/comb_egs${decode_affix}${egs_affix}
+if [ $stage -le 2 ]; then
+  echo "$0: combining supervised/unsupervised egs"
+  n1=`cat $sup_egs_dir/info/num_archives`
+  n2=`cat $unsup_egs_dir/info/num_archives`
+  num_archives=$(($n2>$n1?$n2:$n1))
+  num_archives=$[num_archives*3/2]
+  mkdir -p $comb_egs_dir/log
+  cp {$sup_egs_dir,$comb_egs_dir}/train_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/valid_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/combine.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/cmvn_opts
+  cp -r $sup_egs_dir/info $comb_egs_dir
+  echo $num_archives > $comb_egs_dir/info/num_archives
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/num_frames | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/num_frames
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/egs_per_archive | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/egs_per_archive
+  out_egs_list=
+  egs_list=
+  for n in $(seq $num_archives); do
+      [ -f $sup_egs_dir/cegs.$n.ark ] && egs_list="$egs_list $sup_egs_dir/cegs.$n.ark"
+      [ -f $unsup_egs_dir/cegs.$n.ark ] && egs_list="$egs_list $unsup_egs_dir/cegs.$n.ark"
+      out_egs_list="$out_egs_list ark:$comb_egs_dir/cegs.$n.ark"
+  done
+  srand=0
+  $decode_cmd $comb_egs_dir/log/combine.log \
+              nnet3-chain-copy-egs "ark:cat $egs_list|" $out_egs_list
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: training on the supervised+unsupervised subset"
+  # the train-set and gmm do not matter as we are providing the egs
+  local/chain/run_tdnn.sh --stage 12 --remove-egs false --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix \
+                          --tdnn-affix ${tdnn_affix}${decode_affix}${egs_affix}${comb_affix} \
+                          --common-egs-dir $comb_egs_dir $train_combined_opts
+fi
diff --git a/egs/fisher_english/s5/local/chain/run_tdnn.sh b/egs/fisher_english/s5/local/chain/run_tdnn.sh
index aefe920ce11..47177f422bf 100755
--- a/egs/fisher_english/s5/local/chain/run_tdnn.sh
+++ b/egs/fisher_english/s5/local/chain/run_tdnn.sh
@@ -29,18 +29,12 @@ tree_affix=
 nnet3_affix=
 xent_regularize=0.1
 hidden_dim=725
-num_leaves=11000
 
 # training options
 num_epochs=4
 remove_egs=false
 common_egs_dir=
 minibatch_size=128
-num_jobs_initial=3
-num_jobs_final=16
-initial_effective_lrate=0.001
-final_effective_lrate=0.0001
-frames_per_iter=1500000
 
 gmm=tri5a
 build_tree_ali_dir=exp/tri4a_ali  # used to make a new tree for chain topology, should match train data
@@ -106,7 +100,7 @@ if [ $stage -le 11 ]; then
   # Build a tree using our new topology.
   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
       --leftmost-questions-truncate -1 \
-      --cmd "$train_cmd" $num_leaves $build_tree_train_data_dir $lang $build_tree_ali_dir $treedir || exit 1;
+      --cmd "$train_cmd" 11000 $build_tree_train_data_dir $lang $build_tree_ali_dir $treedir || exit 1;
 fi
 
 if [ $stage -le 12 ]; then
@@ -175,12 +169,12 @@ if [ $stage -le 13 ]; then
     --egs.opts "--frames-overlap-per-eg 0" \
     --egs.chunk-width 150 \
     --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.frames-per-iter 1500000 \
     --trainer.num-epochs $num_epochs \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate $initial_effective_lrate \
-    --trainer.optimization.final-effective-lrate $final_effective_lrate \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.max-param-change 2.0 \
     --cleanup.remove-egs $remove_egs \
     --feat-dir $train_data_dir \
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_b.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_b.sh
new file mode 100644
index 00000000000..6254dd5d184
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_b.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+set -e
+
+# Based on run_tdnn_7b.sh in the fisher swbd recipe
+
+# configs for 'chain'
+stage=0
+tdnn_affix=7b
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+train_set=train
+tree_affix=
+nnet3_affix=
+gmm=tri5a
+xent_regularize=0.1
+hidden_dim=725
+
+# training options
+num_epochs=4
+remove_egs=false
+common_egs_dir=
+minibatch_size=128
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+gmm_dir=exp/$gmm   # used to get training lattices (for chain supervision)
+build_tree_ali_dir=exp/tri4a_ali  # used to make a new tree for chain topology
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+lat_dir=exp/chain${nnet3_affix}/tri5a_${train_set}_sp_lats  # training lattices directory
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+build_tree_train_data_dir=data/${train_set}
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+lang=data/lang_chain
+
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --speed-perturb true \
+                                  --train-set $train_set \
+                                  --nnet3-affix $nnet3_affix \
+                                  --generate-alignments false || exit 1;
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat $build_tree_ali_dir/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" $lores_train_data_dir \
+    data/lang $gmm_dir $lat_dir || exit 1;
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 11 ]; then
+  # Build a tree using our new topology.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --leftmost-questions-truncate -1 \
+      --cmd "$train_cmd" 11000 $build_tree_train_data_dir $lang $build_tree_ali_dir $treedir || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  touch $dir/egs/.nodelete # keep egs around when that run dies.
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$common_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+decode_suff=
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    iter_opts=" --iter $decode_iter "
+  fi
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_a.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_a.sh
new file mode 100755
index 00000000000..c5e0401c3e5
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_a.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+stage=-2
+nj=30
+decode_nj=30
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k  # affix relating train-set splitting proportion
+
+tdnn_affix=_sup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# combination options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+comb_affix=_comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+unsup_egs_weight=1.0
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+left_tolerance=2
+right_tolerance=2
+train_combined_opts="--num-epochs 4.5"
+graph_affix=   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+# to tune:
+# frames_per_eg for unsupervised
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+if [ $stage -le -1 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires/ivector_online.scp ]; then
+  echo "$0: getting ivectors for the hires unsupervised data data/${unsupervised_set}_hires"
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+            data/${unsupervised_set}_hires exp/nnet3${nnet3_affix}/extractor \
+            exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires
+fi
+
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+cmvn_opts=`cat $chaindir/cmvn_opts`
+
+if [ $stage -le 0 ]; then
+  echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+  graphdir=$chaindir/graph${graph_affix}
+  if [ ! -f $graphdir/HCLG.fst ]; then
+    utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+  fi
+  steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+            --acwt 1.0 --post-decode-acwt 10.0 \
+            --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+            --scoring-opts "--min-lmwt 5 " \
+            $chaindir/graph${graph_affix} data/${unsupervised_set}_hires $chaindir/decode_${unsupervised_set}${decode_affix}
+  ln -s ../final.mdl $chaindir/decode_${unsupervised_set}${decode_affix}/final.mdl || true
+fi
+
+unsup_egs_dir=$chain_dir/unsup_egs${decode_affix}${egs_affix}
+
+if [ $stage -le 1 ]; then
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $left_tolerance --right-tolerance $right_tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --egs-weight $unsup_egs_weight \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             data/${unsupervised_set}_hires $chaindir \
+             ${chaindir}/decode_${unsupervised_set}${decode_affix} $unsup_egs_dir
+fi
+
+sup_egs_dir=$chaindir/egs
+unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}
+comb_egs_dir=$chaindir/comb_egs${decode_affix}${egs_affix}
+if [ $stage -le 2 ]; then
+  echo "$0: combining supervised/unsupervised egs"
+  n1=`cat $sup_egs_dir/info/num_archives`
+  n2=`cat $unsup_egs_dir/info/num_archives`
+  num_archives=$(($n2>$n1?$n2:$n1))
+  num_archives=$[num_archives*3/2]
+  mkdir -p $comb_egs_dir/log
+  cp {$sup_egs_dir,$comb_egs_dir}/train_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/valid_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/combine.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/cmvn_opts
+  cp -r $sup_egs_dir/info $comb_egs_dir
+  echo $num_archives > $comb_egs_dir/info/num_archives
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/num_frames | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/num_frames
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/egs_per_archive | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/egs_per_archive
+  out_egs_list=
+  egs_list=
+  for n in $(seq $num_archives); do
+      [ -f $sup_egs_dir/cegs.$n.ark ] && egs_list="$egs_list $sup_egs_dir/cegs.$n.ark"
+      [ -f $unsup_egs_dir/cegs.$n.ark ] && egs_list="$egs_list $unsup_egs_dir/cegs.$n.ark"
+      out_egs_list="$out_egs_list ark:$comb_egs_dir/cegs.$n.ark"
+  done
+  srand=0
+  $decode_cmd $comb_egs_dir/log/combine.log \
+              nnet3-chain-copy-egs "ark:cat $egs_list|" $out_egs_list
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: training on the supervised+unsupervised subset"
+  # the train-set and gmm do not matter as we are providing the egs
+  local/chain/run_tdnn.sh --stage 12 --remove-egs false --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix \
+                          --tdnn-affix ${tdnn_affix}${decode_affix}${egs_affix}${comb_affix} \
+                          --common-egs-dir $comb_egs_dir $train_combined_opts
+fi
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_b.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_b.sh
new file mode 100755
index 00000000000..0c12140c8c7
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_b.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=30
+decode_nj=30
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=_ex250k
+egs_affix=_prun2_lmwt0_tol2  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+left_tolerance=2
+right_tolerance=2
+graph_affix=   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+common_egs_dir=
+
+# Semi-supervised options
+comb_affix=_comb1b2  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+
+decode_iter=
+# to tune:
+# frames_per_eg for unsupervised
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+if [ $stage -le -1 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires/ivector_online.scp ]; then
+  echo "$0: getting ivectors for the hires unsupervised data data/${unsupervised_set}_hires"
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+            data/${unsupervised_set}_hires exp/nnet3${nnet3_affix}/extractor \
+            exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires
+fi
+
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+cmvn_opts=`cat $chaindir/cmvn_opts`
+
+if [ -z "$common_egs_dir" ]; then
+  if [ $stage -le 0 ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    graphdir=$chaindir/graph${graph_affix}
+    if [ ! -f $graphdir/HCLG.fst ]; then
+      utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+    fi
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+              --scoring-opts "--min-lmwt 5 " \
+              $chaindir/graph${graph_affix} data/${unsupervised_set}_hires $chaindir/decode_${unsupervised_set}${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${unsupervised_set}${decode_affix}/final.mdl || true
+  fi
+
+  unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}
+
+  if [ $stage -le 1 ]; then
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $left_tolerance --right-tolerance $right_tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               data/${unsupervised_set}_hires $chaindir \
+               ${chaindir}/decode_${unsupervised_set}${decode_affix} $unsup_egs_dir
+  fi
+
+  sup_egs_dir=$chaindir/egs_scp
+  unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}_scp
+  comb_egs_dir=$chaindir/comb_egs${decode_affix}${egs_affix}_multi
+  if [ $stage -le 2 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $comb_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$comb_egs_dir $comb_egs_dir
+    fi
+
+    touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+
+    steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+      --minibatch-size 128 --samples-per-iter 10000 \
+      --lang2weight 1.0,0.5 --egs-prefix cegs. 2 \
+      $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  fi
+  common_egs_dir=$comb_egs_dir
+fi 
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_sp_lats  # not required since egs is given.
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix}
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $stage -le 13 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$common_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_sp_hires \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+decode_suff=
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z "$decode_iter" ]; then
+    iter_opts=" --iter $decode_iter "
+  else 
+    decode_iter=final
+  fi
+  
+  nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output;" $dir/$decode_iter.mdl - | \
+    nnet3-am-copy --set-raw-nnet=- $dir/$decode_iter.mdl $dir/${decode_iter}-output.mdl
+
+  iter_opts=" --iter ${decode_iter}-output "
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_a.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_a.sh
new file mode 100644
index 00000000000..4a0b5f1dd26
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_a.sh
@@ -0,0 +1,331 @@
+#!/bin/bash
+
+# This script is the baseline with unsupervised egs in multilingual recipe.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: None
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 0
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+calib_train_set=${unsupervised_set}_10k_calib_train
+calib_dev_set=${unsupervised_set}_10k_calib_dev
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in ${calib_train_set} ${calib_dev_set} $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+calib_train_set=${calib_train_set}_sp
+calib_dev_set=${calib_dev_set}_sp
+unsupervised_set=${unsupervised_set}_sp
+
+arpa_gz=data/local/lm_ex250k/3gram-mincount/lm_unpruned.gz
+
+if [ $stage -le 6 ]; then
+  if [ ! -f $arpa_gz ]; then
+    echo "$0: Could not find $arpa_gz"
+    exit 1
+  fi
+
+  local/chain/confidence_calibration.sh --chaindir $chaindir \
+    --graph-affix $graph_affix --train-set $calib_train_set \
+    --dev-set $calib_dev_set \
+    --arpa-gz $arpa_gz
+fi
+
+set -e -o pipefail -u
+
+train_caldir=$chaindir/decode_${calib_train_set}${decode_affix}/confidence
+if [ $stage -le 7 ]; then
+  steps/conf/apply_calibration.sh --cmd "$decode_cmd" \
+    data/${unsupervised_set}_hires $graphdir \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $train_caldir $chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+fi
+
+conf_dir=$chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+conf_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $conf_dir ${PWD}`
+
+if [ $stage -le 8 ]; then
+  utils/split_data.sh --per-utt data/${unsupervised_set}_hires 100
+  $train_cmd JOB=1:100 $conf_dir/get_weights.JOB.log \
+    utils/filter_scp.pl data/${unsupervised_set}_hires/split100utt/JOB/utt2spk $conf_dir/ctm_calibrated \| \
+    steps/conf/convert_ctm_to_weights.py --frame-shift=0.03 \
+      data/${unsupervised_set}_hires/split100utt/JOB/segments - - \| \
+    copy-vector ark,t:- \
+    ark,scp:$conf_dir/weights.JOB.ark,$conf_dir/weights.JOB.scp
+  
+  for n in `seq 100`; do
+    cat $conf_dir/weights.$n.scp
+  done > $conf_dir/weights.scp
+fi
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+cmvn_opts=`cat $chaindir/cmvn_opts`
+
+unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}
+
+if [ $stage -le 9 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+  fi
+  touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $tolerance --right-tolerance $tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --deriv-weights-scp $conf_dir/weights.scp \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             data/${unsupervised_set}_hires $chaindir \
+             ${chaindir}/decode_${unsupervised_set}${decode_affix} $unsup_egs_dir
+fi
+
+sup_egs_dir=$chaindir/egs_scp
+comb_egs_dir=$chaindir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 10 ]; then
+
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_sp_lats  # not required since egs is given.
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $stage -le 13 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_sp_hires \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_c.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_c.sh
new file mode 100644
index 00000000000..0564bf693ab
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_c.sh
@@ -0,0 +1,380 @@
+#!/bin/bash
+
+# This script is similar to _a but uses denominator FST created using 
+# LM estimated on supervised + unsupervised set phone sequences
+# and deriv weights from calibrated confidences.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: None
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 2/3
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=_comb1c  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+calib_train_set=${unsupervised_set}_10k_calib_train
+calib_dev_set=${unsupervised_set}_10k_calib_dev
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in ${calib_train_set} ${calib_dev_set} $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+calib_train_set=${calib_train_set}_sp
+calib_dev_set=${calib_dev_set}_sp
+unsupervised_set=${unsupervised_set}_sp
+
+arpa_gz=data/local/lm_ex250k/3gram-mincount/lm_unpruned.gz
+
+if [ $stage -le 6 ]; then
+  if [ ! -f $arpa_gz ]; then
+    echo "$0: Could not find $arpa_gz"
+    exit 1
+  fi
+
+  local/chain/confidence_calibration.sh --chaindir $chaindir \
+    --graph-affix $graph_affix --train-set $calib_train_set \
+    --dev-set $calib_dev_set \
+    --arpa-gz $arpa_gz
+fi
+
+set -e -o pipefail -u
+
+train_caldir=$chaindir/decode_${calib_train_set}${decode_affix}/confidence
+if [ $stage -le 7 ]; then
+  steps/conf/apply_calibration.sh --cmd "$decode_cmd" \
+    data/${unsupervised_set}_hires $graphdir \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $train_caldir $chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+fi
+
+conf_dir=$chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+conf_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $conf_dir ${PWD}`
+
+if [ $stage -le 8 ]; then
+  utils/split_data.sh --per-utt data/${unsupervised_set}_hires 100
+  $train_cmd JOB=1:100 $conf_dir/get_weights.JOB.log \
+    utils/filter_scp.pl data/${unsupervised_set}_hires/split100utt/JOB/utt2spk $conf_dir/ctm_calibrated \| \
+    steps/conf/convert_ctm_to_weights.py --frame-shift=0.03 \
+      data/${unsupervised_set}_hires/split100utt/JOB/segments - - \| \
+    copy-vector ark,t:- \
+    ark,scp:$conf_dir/weights.JOB.ark,$conf_dir/weights.JOB.scp
+  
+  for n in `seq 100`; do
+    cat $conf_dir/weights.$n.scp
+  done > $conf_dir/weights.scp
+fi
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  false && $decode_cmd JOB=1:$(cat $unsup_lat_dir/num_jobs) \
+    ${chaindir}/best_path_${unsupervised_set}${decode_affix}/log/get_best_path.JOB.log \
+    lattice-best-path --acoustic-scale=1.0 \
+    "ark:gunzip -c $unsup_lat_dir/lat.JOB.gz |" ark:/dev/null \
+    "ark:| gzip -c > ${chaindir}/best_path_${unsupervised_set}${decode_affix}/ali.JOB.gz"
+
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+sup_egs_dir=$dir/egs_${supervised_set}
+
+if [ $stage -le 10 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+  fi
+  touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the supervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --alignment-subsampling-factor 3 \
+             --frames-per-eg $(cat $chaindir/egs/info/frames_per_eg) \
+             --frames-per-iter 1500000 \
+             --cmvn-opts "$cmvn_opts" \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+             --generate-egs-scp true \
+             data/${supervised_set}_hires $dir \
+             $sup_lat_dir $sup_egs_dir
+fi
+
+unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+  fi
+  touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $tolerance --right-tolerance $tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --deriv-weights-scp $conf_dir/weights.scp \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             --generate-egs-scp true \
+             data/${unsupervised_set}_hires $chaindir \
+             $unsup_lat_dir $unsup_egs_dir
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -2 ]; then
+  train_stage=-2
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_d.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_d.sh
new file mode 100644
index 00000000000..572a3f8466e
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_d.sh
@@ -0,0 +1,298 @@
+#!/bin/bash
+
+# This script is similar to _a but uses deriv weights from lattice-posteriors.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: Lattice posteriors (Bug when originally run)
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 0
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1d  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix}
+unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+if [ $stage -le 9 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+  fi
+  touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $tolerance --right-tolerance $tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             --generate-egs-scp true \
+             data/${unsupervised_set}_hires $chaindir \
+             ${chaindir}/decode_${unsupervised_set}${decode_affix} $unsup_egs_dir
+fi
+
+sup_egs_dir=$chaindir/egs_scp
+comb_egs_dir=$chaindir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 10 ]; then
+
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+supervised_set=${supervised_set}_sp
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats  # not required since egs is given.
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $stage -le 13 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_e.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_e.sh
new file mode 100644
index 00000000000..24734d216e2
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_e.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+
+# This script is similar to _c but re-creates supervised egs using new 
+# normalization FST.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: None
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 2/3
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+sup_egs_dir=   
+comb_affix=comb1e  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+calib_train_set=${unsupervised_set}_10k_calib_train
+calib_dev_set=${unsupervised_set}_10k_calib_dev
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in ${calib_train_set} ${calib_dev_set} $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+calib_train_set=${calib_train_set}_sp
+calib_dev_set=${calib_dev_set}_sp
+unsupervised_set=${unsupervised_set}_sp
+
+arpa_gz=data/local/lm_ex250k/3gram-mincount/lm_unpruned.gz
+
+if [ $stage -le 6 ]; then
+  if [ ! -f $arpa_gz ]; then
+    echo "$0: Could not find $arpa_gz"
+    exit 1
+  fi
+
+  local/chain/confidence_calibration.sh --chaindir $chaindir \
+    --graph-affix $graph_affix --train-set $calib_train_set \
+    --dev-set $calib_dev_set \
+    --arpa-gz $arpa_gz
+fi
+
+set -e -o pipefail -u
+
+train_caldir=$chaindir/decode_${calib_train_set}${decode_affix}/confidence
+if [ $stage -le 7 ]; then
+  steps/conf/apply_calibration.sh --cmd "$decode_cmd" \
+    data/${unsupervised_set}_hires $graphdir \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $train_caldir $chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+fi
+
+conf_dir=$chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+conf_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $conf_dir ${PWD}`
+
+if [ $stage -le 8 ]; then
+  utils/split_data.sh --per-utt data/${unsupervised_set}_hires 100
+  $train_cmd JOB=1:100 $conf_dir/get_weights.JOB.log \
+    utils/filter_scp.pl data/${unsupervised_set}_hires/split100utt/JOB/utt2spk $conf_dir/ctm_calibrated \| \
+    steps/conf/convert_ctm_to_weights.py --frame-shift=0.03 \
+      data/${unsupervised_set}_hires/split100utt/JOB/segments - - \| \
+    copy-vector ark,t:- \
+    ark,scp:$conf_dir/weights.JOB.ark,$conf_dir/weights.JOB.scp
+  
+  for n in `seq 100`; do
+    cat $conf_dir/weights.$n.scp
+  done > $conf_dir/weights.scp
+fi
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  false && $decode_cmd JOB=1:$(cat $unsup_lat_dir/num_jobs) \
+    ${chaindir}/best_path_${unsupervised_set}${decode_affix}/log/get_best_path.JOB.log \
+    lattice-best-path --acoustic-scale=1.0 \
+    "ark:gunzip -c $unsup_lat_dir/lat.JOB.gz |" ark:/dev/null \
+    "ark:| gzip -c > ${chaindir}/best_path_${unsupervised_set}${decode_affix}/ali.JOB.gz"
+
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+  fi
+  touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $tolerance --right-tolerance $tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --deriv-weights-scp $conf_dir/weights.scp \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             --generate-egs-scp true \
+             data/${unsupervised_set}_hires $chaindir \
+             $unsup_lat_dir $unsup_egs_dir
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_f.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_f.sh
new file mode 100644
index 00000000000..faef0c70546
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_f.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+
+# This script is similar to _e but uses deriv weights from lattice-posteriors
+# instead of from calibrated confidences.
+# But there is a minor bug in creating the lattice posteriors when this 
+# script was run. An acwt of 1.0 was used for lattice-best-path when it 
+# should have been 0.1.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: Lattice posteriors (Bug when originally run)
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 2/3
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+sup_egs_dir=   
+comb_affix=comb1f  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+
+tree_affix=
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+  fi
+  touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $tolerance --right-tolerance $tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --phone-insertion-penalty "$phone_insertion_penalty" \
+             --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             --generate-egs-scp true \
+             data/${unsupervised_set}_hires $chaindir \
+             $unsup_lat_dir $unsup_egs_dir
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_g.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_g.sh
new file mode 100644
index 00000000000..9dbca030174
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_g.sh
@@ -0,0 +1,383 @@
+#!/bin/bash
+
+# This script is same as _e but uses a weight of 1.0 for unsupervised egs.
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: None
+# Unsupervised weight: 1.0
+# Unsupervised weight for phone LM: 2/3
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1g  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=3,2
+sup_egs_dir=
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+calib_train_set=${unsupervised_set}_10k_calib_train
+calib_dev_set=${unsupervised_set}_10k_calib_dev
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in ${calib_train_set} ${calib_dev_set} $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+calib_train_set=${calib_train_set}_sp
+calib_dev_set=${calib_dev_set}_sp
+unsupervised_set=${unsupervised_set}_sp
+
+arpa_gz=data/local/lm_ex250k/3gram-mincount/lm_unpruned.gz
+
+if [ $stage -le 6 ]; then
+  if [ ! -f $arpa_gz ]; then
+    echo "$0: Could not find $arpa_gz"
+    exit 1
+  fi
+
+  local/chain/confidence_calibration.sh --chaindir $chaindir \
+    --graph-affix $graph_affix --train-set $calib_train_set \
+    --dev-set $calib_dev_set \
+    --arpa-gz $arpa_gz
+fi
+
+set -e -o pipefail -u
+
+train_caldir=$chaindir/decode_${calib_train_set}${decode_affix}/confidence
+if [ $stage -le 7 ]; then
+  steps/conf/apply_calibration.sh --cmd "$decode_cmd" \
+    data/${unsupervised_set}_hires $graphdir \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $train_caldir $chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+fi
+
+conf_dir=$chaindir/decode_${unsupervised_set}${decode_affix}/confidence
+conf_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $conf_dir ${PWD}`
+
+if [ $stage -le 8 ]; then
+  utils/split_data.sh --per-utt data/${unsupervised_set}_hires 100
+  $train_cmd JOB=1:100 $conf_dir/get_weights.JOB.log \
+    utils/filter_scp.pl data/${unsupervised_set}_hires/split100utt/JOB/utt2spk $conf_dir/ctm_calibrated \| \
+    steps/conf/convert_ctm_to_weights.py --frame-shift=0.03 \
+      data/${unsupervised_set}_hires/split100utt/JOB/segments - - \| \
+    copy-vector ark,t:- \
+    ark,scp:$conf_dir/weights.JOB.ark,$conf_dir/weights.JOB.scp
+  
+  for n in `seq 100`; do
+    cat $conf_dir/weights.$n.scp
+  done > $conf_dir/weights.scp
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  $decode_cmd JOB=1:$(cat $unsup_lat_dir/num_jobs) \
+    ${chaindir}/best_path_${unsupervised_set}${decode_affix}/log/get_best_path.JOB.log \
+    lattice-best-path --acoustic-scale=1.0 \
+    "ark:gunzip -c $unsup_lat_dir/lat.JOB.gz |" ark:/dev/null \
+    "ark:| gzip -c > ${chaindir}/best_path_${unsupervised_set}${decode_affix}/ali.JOB.gz"
+
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $conf_dir/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --samples-per-iter 10000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_h.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_h.sh
new file mode 100644
index 00000000000..866f310c0ed
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_h.sh
@@ -0,0 +1,348 @@
+#!/bin/bash
+
+# This script is same as _g, but uses deriv weights from lattice posteriors
+# instead of calibrated confidences. But there was a bug when running this 
+# script. (An acwt of 1.0 was used for lattice-best-path instead of 0.1)
+# lattice_lm_scale=0.0
+# lattice_prune_beam=2.0
+# tolerance=2
+# unsup_frames_per_eg=150
+# Deriv weights: Lattice posteriors (Bug when originally run)
+# Unsupervised weight: 0.5
+# Unsupervised weight for phone LM: 2/3
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1h  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=3,2
+sup_egs_dir=
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 1.0 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch 128 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_i.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_i.sh
new file mode 100644
index 00000000000..69e29d600c9
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_i.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is similar to _h, but uses unsup_frames_per_eg of 300.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=300  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1i  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=3,2
+sup_egs_dir=
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 1.0 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_j.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_j.sh
new file mode 100644
index 00000000000..6d98f9cf6da
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_j.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is same as _k, but uses a weight of 0.5 for unsupervised egs.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=300  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1j  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 1.0 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_k.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_k.sh
new file mode 100644
index 00000000000..96d101ac2f2
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_k.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is same as _f, but uses an lm-scale of 0.1.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.1  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1k  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 1.0 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_l.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_l.sh
new file mode 100644
index 00000000000..371bfcfc1b6
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_l.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is same as _f, but uses an lm-scale of 0.5.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.5  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1l  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 1.0 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_m.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_m.sh
new file mode 100644
index 00000000000..b608e77e309
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_m.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1m  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_n.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_n.sh
new file mode 100644
index 00000000000..b463ed56485
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_n.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+
+# This script is same as _c, but redone to be consistent with _m.
+# So it does not have any deriv weights.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1n  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=3,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_o.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_o.sh
new file mode 100644
index 00000000000..b4e9e1e5faf
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_o.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+
+# This script is same as _a, but re-done to be consistent with _m.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb350k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup
+semi_affix=350k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1o  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.5
+lm_weights=1
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+mkdir -p $dir
+
+if [ $stage -le 9 ]; then
+  cp ${chaindir}/{phone_lm.fst,den.fst,normalization.fst,0.trans_mdl} $dir/
+  #steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+  #  ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+  #  $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_p.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_p.sh
new file mode 100644
index 00000000000..7137523c843
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_p.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb270k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train
+semi_affix=270k_conf  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a_20k  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1p  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_sup20k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix \
+                          --hidden-dim 500
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_q.sh b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_q.sh
new file mode 100644
index 00000000000..cf12901f617
--- /dev/null
+++ b/egs/fisher_english/s5/local/chain/tuning/run_tdnn_semisupervised_conf_q.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=train_comb270k # for reference
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup_20k
+semi_affix=270k_conf_pca  # affix relating train-set splitting proportion
+
+tdnn_affix=_xxsup1a_20k  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1q  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_semi${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_sup20k.sh --hidden-dim 500 \
+                          $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --use-pca true \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+extractor=exp/nnet3${nnet3_affix}/extractor
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires
+    steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \
+      data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires/ivector_online.scp ]; then
+    echo "$0: getting ivectors for the hires unsupervised data data/${dset}_hires"
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+              data/${dset}_sp_hires exp/nnet3${nnet3_affix}/extractor \
+              exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+unsupervised_set=${unsupervised_set}_sp
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+treedir=exp/chain${nnet3_affix}/tree_${tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+if [ $stage -le 9 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=exp/chain${nnet3_affix}/tri5a_${supervised_set}_lats
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  
+  left_context=`cat $chaindir/egs/info/left_context`
+  right_context=`cat $chaindir/egs/info/right_context`
+  left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+  right_context_final=`cat $chaindir/egs/info/right_context_final`
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 10 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  left_context=`cat $sup_egs_dir/info/left_context`
+  right_context=`cat $sup_egs_dir/info/right_context`
+  left_context_initial=`cat $sup_egs_dir/info/left_context_initial`
+  right_context_final=`cat $sup_egs_dir/info/right_context_final`
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 11 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $chaindir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 12 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 15 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 16 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/fisher_create_test_lang.sh b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
index 1d7c4013b83..533a0949962 100755
--- a/egs/fisher_english/s5/local/fisher_create_test_lang.sh
+++ b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
@@ -44,5 +44,7 @@ fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
 fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \
    fstisstochastic || echo "[log:] LG is not stochastic"
 
+utils/build_const_arpa_lm.sh data/local/lm/4gram-mincount/lm_unpruned.gz \
+  data/lang_test data/lang_test_fg
 
 echo "$0 succeeded"
diff --git a/egs/fisher_english/s5/local/fisher_train_lms.sh b/egs/fisher_english/s5/local/fisher_train_lms.sh
index 881d3ce9466..585680550f8 100755
--- a/egs/fisher_english/s5/local/fisher_train_lms.sh
+++ b/egs/fisher_english/s5/local/fisher_train_lms.sh
@@ -70,6 +70,8 @@ cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]
 
 train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1;
 
+train_lm.sh --arpa --lmtype 4gram-mincount $dir || exit 1;
+
 # Perplexity over 88307.000000 words (excluding 691.000000 OOVs) is 71.241332
 
 # note: output is
diff --git a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
index 6505381b03f..b1285de008f 100755
--- a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
@@ -6,8 +6,9 @@ stage=1
 generate_alignments=true  # false if doing chain training
 speed_perturb=true
 train_set=train
-
 lda_train_set=train_100k
+extractor=  # ivector-extractor. 
+            # If provided, will be used instead of training a new one.
 nnet3_affix=
 gmm=tri2_ali   # should also contain alignments for $lda_train_set
 
@@ -94,37 +95,42 @@ for line in sys.stdin.readlines():
     steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
     utils/fix_data_dir.sh data/${dataset}_hires  # remove segments with problems
   done
-
-  # Take the first 30k utterances (about 1/8th of the data) this will be used
-  # for the diagubm training
-  utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires
-  utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires  # 33hr
 fi
 
-# ivector extractor training
-if [ $stage -le 4 ]; then
-  # We need to build a small system just because we need the LDA+MLLT transform
-  # to train the diag-UBM on top of.  We use --num-iters 13 because after we get
-  # the transform (12th iter is the last), any further training is pointless.
-  # this decision is based on fisher_english
-  steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
-    --splice-opts "--left-context=3 --right-context=3" \
-    5500 90000 data/${lda_train_set}_hires \
-    data/lang $gmm_dir exp/nnet3${nnet3_affix}/tri3a
-fi
+if [ -z "$extractor" ]; then
+  if [ $stage -le 3 ]; then
+    # Take the first 30k utterances (about 1/8th of the data) this will be used
+    # for the diagubm training
+    utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires
+    utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires  # 33hr
+  fi
 
-if [ $stage -le 5 ]; then
-  # To train a diagonal UBM we don't need very much data, so use the smallest subset.
-  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
-    data/${train_set}_30k_nodup_hires 512 exp/nnet3${nnet3_affix}/tri3a exp/nnet3${nnet3_affix}/diag_ubm
-fi
+  # ivector extractor training
+  if [ $stage -le 4 ]; then
+    # We need to build a small system just because we need the LDA+MLLT transform
+    # to train the diag-UBM on top of.  We use --num-iters 13 because after we get
+    # the transform (12th iter is the last), any further training is pointless.
+    # this decision is based on fisher_english
+    steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
+      --splice-opts "--left-context=3 --right-context=3" \
+      5500 90000 data/${lda_train_set}_hires \
+      data/lang $gmm_dir exp/nnet3${nnet3_affix}/tri3a
+  fi
+
+  if [ $stage -le 5 ]; then
+    # To train a diagonal UBM we don't need very much data, so use the smallest subset.
+    steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
+      data/${train_set}_30k_nodup_hires 512 exp/nnet3${nnet3_affix}/tri3a exp/nnet3${nnet3_affix}/diag_ubm
+  fi
 
-if [ $stage -le 6 ]; then
-  # iVector extractors can be sensitive to the amount of data, but this one has a
-  # fairly small dim (defaults to 100) so we don't use all of it, we use just the
-  # 100k subset (just under half the data).
-  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    data/${lda_train_set}_hires exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+  if [ $stage -le 6 ]; then
+    # iVector extractors can be sensitive to the amount of data, but this one has a
+    # fairly small dim (defaults to 100) so we don't use all of it, we use just the
+    # 100k subset (just under half the data).
+    steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+      data/${lda_train_set}_hires exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+  fi
+  extractor=exp/nnet3${nnet3_affix}/extractor
 fi
 
 if [ $stage -le 7 ]; then
@@ -136,11 +142,11 @@ if [ $stage -le 7 ]; then
   steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires
 
   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
-    data/${train_set}_max2_hires exp/nnet3${nnet3_affix}/extractor exp/nnet3${nnet3_affix}/ivectors_${train_set}_hires || exit 1;
+    data/${train_set}_max2_hires $extractor `basename $extractor`/ivectors_${train_set}_hires || exit 1;
 
   for dataset in test dev; do
     steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
-      data/${dataset}_hires exp/nnet3${nnet3_affix}/extractor exp/nnet3${nnet3_affix}/ivectors_${dataset}_hires || exit 1;
+      data/${dataset}_hires $extractor `basename $extractor`/ivectors_${dataset}_hires || exit 1;
   done
 fi
 
diff --git a/egs/fisher_english/s5/local/nnet3/run_ivector_common_pca.sh b/egs/fisher_english/s5/local/nnet3/run_ivector_common_pca.sh
new file mode 100755
index 00000000000..e159781e9a1
--- /dev/null
+++ b/egs/fisher_english/s5/local/nnet3/run_ivector_common_pca.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. ./cmd.sh
+set -e
+stage=1
+speed_perturb=true
+train_set=train
+ivector_train_set=  # data set for training i-vector extractor. 
+                    # If not provided, train_set will be used.
+
+nnet3_affix=
+exp=exp
+
+. ./path.sh
+. ./utils/parse_options.sh
+
+# perturbed data preparation
+if [ "$speed_perturb" == "true" ]; then
+  if [ $stage -le 1 ]; then
+    # Although the nnet will be trained by high resolution data, we still have
+    # to perturb the normal data to get the alignments.
+    # _sp stands for speed-perturbed
+    
+    for datadir in ${train_set} ${ivector_train_set}; do
+      utils/data/perturb_data_dir_speed_3way.sh data/${datadir} data/${datadir}_sp
+      utils/fix_data_dir.sh data/${datadir}_sp
+
+      mfccdir=mfcc_perturbed
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
+        data/${datadir}_sp exp/make_mfcc/${datadir}_sp $mfccdir || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_sp exp/make_mfcc/${datadir}_sp $mfccdir || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_sp
+    done
+  fi
+  train_set=${train_set}_sp
+  if ! [ -z "$ivector_train_set" ]; then
+    ivector_train_set=${ivector_train_set}_sp
+  fi
+fi
+
+if [ $stage -le 3 ]; then
+  mfccdir=mfcc_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    date=$(date +'%m_%d_%H_%M')
+    utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/fisher_english-$date/s5b/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for dataset in $ivector_train_set $train_set; do
+    utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
+    utils/data/perturb_data_dir_volume.sh data/${dataset}_hires
+
+    steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
+    steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir;
+
+    # Remove the small number of utterances that couldn't be extracted for some
+    # reason (e.g. too short; no such file).
+    utils/fix_data_dir.sh data/${dataset}_hires;
+  done
+
+  for dataset in test dev; do
+    # Create MFCCs for the eval set
+    utils/copy_data_dir.sh data/$dataset data/${dataset}_hires
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
+        data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
+    steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
+    utils/fix_data_dir.sh data/${dataset}_hires  # remove segments with problems
+  done
+fi
+
+if [ -z "$ivector_train_set" ]; then
+  ivector_train_set=$train_set
+fi
+
+# ivector extractor training
+if [ $stage -le 4 ]; then
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+    --splice-opts "--left-context=3 --right-context=3" \
+    --max-utts 10000 --subsample 2 \
+    data/${ivector_train_set}_hires \
+    $exp/nnet3${nnet3_affix}/pca_transform
+fi
+
+if [ $stage -le 5 ]; then
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
+    data/${ivector_train_set}_hires 512 \
+    $exp/nnet3${nnet3_affix}/pca_transform $exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 6 ]; then
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${ivector_train_set}_hires $exp/nnet3${nnet3_affix}/diag_ubm $exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  # We extract iVectors on all the ${train_set} data, which will be what we
+  # train the system on.
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${ivector_train_set}_hires data/${ivector_train_set}_max2_hires
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
+    data/${ivector_train_set}_max2_hires $exp/nnet3${nnet3_affix}/extractor $exp/nnet3${nnet3_affix}/ivectors_${ivector_train_set}_hires || exit 1;
+fi
+
+if [ $stage -le 8 ]; then
+  for dataset in test dev; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
+      data/${dataset}_hires $exp/nnet3${nnet3_affix}/extractor $exp/nnet3${nnet3_affix}/ivectors_${dataset}_hires || exit 1;
+  done
+fi
+
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/nnet3/run_tdnn.sh b/egs/fisher_english/s5/local/nnet3/run_tdnn.sh
new file mode 100644
index 00000000000..f055b853b61
--- /dev/null
+++ b/egs/fisher_english/s5/local/nnet3/run_tdnn.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# This script is not tested.
+
+# this is the standard "tdnn" system, built in nnet3; it's what we used to
+# call multi-splice.
+
+. ./cmd.sh
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+stage=0
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+dir=exp/nnet3/tdnn
+dir=$dir${affix:+_$affix}
+train_set=train_sp
+ali_dir=exp/tri5a_ali_train_sp
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --speed-perturb true || exit 1;
+
+if [ $stage -le 9 ]; then
+  echo "$0: creating neural net configs";
+
+  # create the config files for nnet initialization
+  python steps/nnet3/tdnn/make_configs.py  \
+    --feat-dir data/${train_set}_hires \
+    --ivector-dir exp/nnet3/ivectors_${train_set} \
+    --ali-dir $ali_dir \
+    --relu-dim 1024 \
+    --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -3,3 -7,2 0"  \
+    --use-presoftmax-prior-scale true \
+   $dir/configs || exit 1;
+fi
+
+
+if [ $stage -le 10 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_dnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.0017 \
+    --trainer.optimization.final-effective-lrate 0.00017 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval 500 \
+    --use-gpu true \
+    --feat-dir=data/${train_set}_hires \
+    --ali-dir $ali_dir \
+    --lang data/lang \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+
+fi
+
+graph_dir=exp/tri5a/graph
+if [ $stage -le 11 ]; then
+  for decode_set in dev test; do
+    (
+    num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+    steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \
+        --online-ivector-dir exp/nnet3/ivectors_${decode_set} \
+       $graph_dir data/${decode_set}_hires $dir/decode_${decode_set} || exit 1;
+    ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/score.sh b/egs/fisher_english/s5/local/score.sh
index c381abf7277..702a57c94c6 100755
--- a/egs/fisher_english/s5/local/score.sh
+++ b/egs/fisher_english/s5/local/score.sh
@@ -5,6 +5,7 @@
 cmd=run.pl
 min_lmwt=5
 max_lmwt=17
+iter=
 #end configuration section.
 
 [ -f ./path.sh ] && . ./path.sh
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k.sh
new file mode 100755
index 00000000000..81335e5ae5b
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+set -e
+
+# This is fisher chain recipe for training a model on a subset of around 10 hours.
+
+# configs for 'chain'
+stage=0
+tdnn_affix=7b
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+train_set=train_sup11k
+ivector_train_set=semisup11k_250k
+tree_affix=
+nnet3_affix=_semi11k_250k
+chain_affix=_semi11k_250k
+exp=exp/semisup_11k
+gmm=tri3
+xent_regularize=0.1
+hidden_dim=500
+
+# training options
+num_epochs=10
+remove_egs=false
+common_egs_dir=
+minibatch_size=128
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=$exp/$gmm   # used to get training lattices (for chain supervision)
+treedir=$exp/chain${chain_affix}/tree_${tree_affix}
+lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_lats  # training lattices directory
+dir=$exp/chain${chain_affix}/tdnn${tdnn_affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${ivector_train_set}_sp_hires
+lang=data/lang_chain
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+local/nnet3/run_ivector_common_pca.sh --stage $stage --exp $exp \
+                                  --speed-perturb true \
+                                  --train-set $train_set \
+                                  --ivector-train-set $ivector_train_set \
+                                  --nnet3-affix $nnet3_affix || exit 1
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" data/${train_set}_sp \
+    data/lang $gmm_dir $lat_dir || exit 1;
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 11 ]; then
+  # Build a tree using our new topology.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --leftmost-questions-truncate -1 \
+      --cmd "$train_cmd" 4000 data/${train_set} $lang $gmm_dir $treedir || exit 1
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn5 dim=$hidden_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn5 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  touch $dir/egs/.nodelete # keep egs around when that run dies.
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$common_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+decode_suff=
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    iter_opts=" --iter $decode_iter "
+  fi
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_a.sh
new file mode 100644
index 00000000000..60f64dee299
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_a.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_b.sh
new file mode 100644
index 00000000000..f106549167f
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_b.sh
@@ -0,0 +1,358 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1b  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_c.sh
new file mode 100644
index 00000000000..60f64dee299
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_c.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 5 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}/final.mdl || true
+  fi
+done
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_d.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_d.sh
new file mode 100644
index 00000000000..780c783c87f
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_d.sh
@@ -0,0 +1,374 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1d  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,0.3
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=fg
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+  fi
+
+  if [ $stage -le 5 ]; then
+    steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang_test${graph_affix} \
+      data/lang_test${graph_affix}_fg data/${dset}_sp_hires \
+      $chaindir/decode_${dset}_sp${decode_affix} \
+      $chaindir/decode_${dset}_sp${decode_affix}_fg
+
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}_fg/final.mdl || true
+  fi
+done
+
+decode_affix=${decode_affix}_fg
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  if [ -f $treedir/final.mdl ]; then
+    echo "$0: $treedir/final.mdl already exists. Remove it and try again."
+    exit 1
+  fi
+  
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_e.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_e.sh
new file mode 100644
index 00000000000..9f2a2a8993b
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_e.sh
@@ -0,0 +1,374 @@
+#!/bin/bash
+
+# This script is same as _f, but fixes the bug about acwt for best path.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1e  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=fg
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+  fi
+
+  if [ $stage -le 5 ]; then
+    steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang_test${graph_affix} \
+      data/lang_test${graph_affix}_fg data/${dset}_sp_hires \
+      $chaindir/decode_${dset}_sp${decode_affix} \
+      $chaindir/decode_${dset}_sp${decode_affix}_fg
+
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}_fg/final.mdl || true
+  fi
+done
+
+decode_affix=${decode_affix}_fg
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  if [ -f $treedir/final.mdl ]; then
+    echo "$0: $treedir/final.mdl already exists. Remove it and try again."
+    exit 1
+  fi
+  
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_f.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_f.sh
new file mode 100644
index 00000000000..346c5e6eede
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_f.sh
@@ -0,0 +1,374 @@
+#!/bin/bash
+
+# This script is same as _e, but is run for 3 epochs instead of 4.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1f  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=fg
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+  fi
+
+  if [ $stage -le 5 ]; then
+    steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang_test${graph_affix} \
+      data/lang_test${graph_affix}_fg data/${dset}_sp_hires \
+      $chaindir/decode_${dset}_sp${decode_affix} \
+      $chaindir/decode_${dset}_sp${decode_affix}_fg
+
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}_fg/final.mdl || true
+  fi
+done
+
+decode_affix=${decode_affix}_fg
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  if [ -f $treedir/final.mdl ]; then
+    echo "$0: $treedir/final.mdl already exists. Remove it and try again."
+    exit 1
+  fi
+  
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 3 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_g.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_g.sh
new file mode 100644
index 00000000000..ccca9c6d334
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_11k_semisupervised_conf_g.sh
@@ -0,0 +1,374 @@
+#!/bin/bash
+
+# This script is same as _e, but is run for 3 epochs instead of 4.
+
+set -u -e -o pipefail
+
+stage=-2
+train_stage=-100
+nj=40
+decode_nj=40
+base_train_set=semisup11k_250k  # for reference
+exp=exp/semisup_11k
+
+unsupervised_set=train_unsup250k  # set this to your choice of unsupervised data
+supervised_set=train_sup11k
+semi_affix=semi11k_250k  # affix relating train-set splitting proportion
+
+tdnn_affix=7b  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# Unsupervised options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+unsup_frames_per_eg=300  # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly
+lattice_lm_scale=0.0  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=2.0  # If supplied will prune the lattices prior to getting egs for unsupervised data
+tolerance=2
+graph_affix=_ex250k   # can be used to decode the unsup data with another lm/graph
+phone_insertion_penalty=
+
+# Semi-supervised options
+comb_affix=comb1g  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+supervision_weights=1.0,1.0
+lm_weights=5,2
+sup_egs_dir=   
+unsup_egs_dir=
+tree_affix=fg
+
+extra_left_context=0
+extra_right_context=0
+
+xent_regularize=0.1
+hidden_dim=725
+minibatch_size=128
+# to tune:
+# frames_per_eg for unsupervised
+
+decode_iter=
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+nnet3_affix=_${semi_affix}  # affix for nnet3 and chain dirs
+decode_affix=${decode_affix}${graph_affix}
+egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance}
+tree_affix=${tree_affix}_${semi_affix}
+
+RANDOM=0
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if false && [ $stage -le 1 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn_11k.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --ivector-train-set $base_train_set \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix --exp $exp
+fi
+
+extractor=$exp/nnet3${nnet3_affix}/extractor
+chaindir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp
+graphdir=$chaindir/graph${graph_affix}
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test${graph_affix} $chaindir $graphdir
+fi
+
+if [ $stage -le 2 ]; then
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set} 10000 data/${unsupervised_set}_10k
+  utils/subset_data_dir.sh --speakers data/${unsupervised_set}_10k 5000 data/${unsupervised_set}_10k_calib_train
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k_calib_train/utt2spk data/${unsupervised_set}_10k/utt2spk) \
+    data/${unsupervised_set}_10k data/${unsupervised_set}_10k_calib_dev
+  utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/${unsupervised_set}_10k/utt2spk data/${unsupervised_set}/utt2spk) \
+    data/${unsupervised_set} data/${unsupervised_set}_240k
+fi
+
+unsupervised_set=${unsupervised_set}_240k
+
+for dset in $unsupervised_set; do
+  if [ $stage -le 3 ] && [ ! -f data/${dset}_sp_hires/feats.scp ]; then
+    utils/data/perturb_data_dir_speed_3way.sh data/$dset data/${dset}_sp_hires_tmp
+    utils/subset_data_dir.sh --utt-list data/${dset}_sp_hires_tmp/utt2spk \
+      data/${base_train_set}_sp_hires data/${dset}_sp_hires
+  fi
+
+  if [ $stage -le 4 ] && [ ! -f $chaindir/decode_${dset}_sp${decode_affix}/lat.1.gz ]; then
+    echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+    steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+              --acwt 1.0 --post-decode-acwt 10.0 \
+              --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+              --scoring-opts "--min-lmwt 10 --max-lmwt 10" \
+              $graphdir data/${dset}_sp_hires $chaindir/decode_${dset}_sp${decode_affix}
+  fi
+
+  if [ $stage -le 5 ]; then
+    steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang_test${graph_affix} \
+      data/lang_test${graph_affix}_fg data/${dset}_sp_hires \
+      $chaindir/decode_${dset}_sp${decode_affix} \
+      $chaindir/decode_${dset}_sp${decode_affix}_fg
+
+    ln -s ../final.mdl $chaindir/decode_${dset}_sp${decode_affix}_fg/final.mdl || true
+  fi
+done
+
+decode_affix=${decode_affix}_fg
+
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${train_cmd}" --acwt 0.1 \
+    data/${unsupervised_set}_sp_hires data/lang_chain \
+    $chaindir/decode_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix}
+fi
+
+frame_subsampling_factor=1
+if [ -f $chaindir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+fi
+cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1
+
+sup_ali_dir=$exp/tri3
+
+treedir=$exp/chain${nnet3_affix}/tree_${tree_affix}
+if [ $stage -le 9 ]; then
+  if [ -f $treedir/final.mdl ]; then
+    echo "$0: $treedir/final.mdl already exists. Remove it and try again."
+    exit 1
+  fi
+  
+  steps/subset_ali_dir.sh --cmd "$train_cmd" \
+    data/${unsupervised_set} data/${unsupervised_set}_sp_hires \
+    $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix}
+  echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor
+
+  steps/nnet3/chain/build_tree_multiple_sources.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --use-fmllr false \
+    --cmd "$train_cmd" 10000 data/lang_chain \
+    data/${supervised_set} $sup_ali_dir \
+    data/${unsupervised_set} \
+    $chaindir/best_path_${unsupervised_set}${decode_affix} \
+    $treedir
+fi
+
+dir=$exp/chain${nnet3_affix}/tdnn${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix}
+
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_den_fst.sh --weights $lm_weights --cmd "$train_cmd" \
+    ${treedir} ${chaindir}/best_path_${unsupervised_set}${decode_affix} \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-1 input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-0-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-1-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  cp $dir/configs/final.config{,.orig}
+
+  cat $dir/configs/final.config.orig | \
+    perl -pe 's/component=output-1.affine/component=output-0.affine/g; 
+              s/component=output-1-xent.affine/component=output-0-xent.affine/g;' > \
+    $dir/configs/final.config
+fi
+
+. $dir/configs/vars
+
+left_context=$[model_left_context + extra_left_context]
+right_context=$[model_right_context + extra_right_context]
+left_context_initial=$model_left_context
+right_context_final=$model_right_context
+
+left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"`
+right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"`
+left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"`
+right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"`
+
+supervised_set=${supervised_set}_sp
+sup_lat_dir=$exp/chain${nnet3_affix}/tri3_${supervised_set}_lats
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_${supervised_set}
+  frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 3 \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 1500000 \
+               --cmvn-opts "$cmvn_opts" \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${supervised_set}_hires $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsupervised_set=${unsupervised_set}_sp
+unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} 
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=$frames_per_eg
+
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix}
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+               --left-tolerance $tolerance --right-tolerance $tolerance \
+               --left-context $left_context --right-context $right_context \
+               --left-context-initial $left_context_initial --right-context-final $right_context_final \
+               --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+               --lattice-prune-beam "$lattice_prune_beam" \
+               --phone-insertion-penalty "$phone_insertion_penalty" \
+               --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \
+               --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+               --generate-egs-scp true \
+               data/${unsupervised_set}_hires $dir \
+               $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \
+    --minibatch-size 128 --frames-per-iter 1500000 \
+    --lang2weight $supervision_weights --egs-prefix cegs. 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+
+if [ $train_stage -le -4 ]; then
+  train_stage=-4
+fi
+
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${base_train_set}_sp_hires \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights true \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch "150=128/300=64" \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 3 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs false \
+    --feat-dir data/${supervised_set}_hires \
+    --tree-dir $treedir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+if [ $stage -le 18 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1
+    iter_opts=" --iter ${decode_iter}-output "
+  else
+    nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \
+      nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1
+    iter_opts=" --iter final-output "
+  fi
+
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
+
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_oracle.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_oracle.sh
new file mode 100755
index 00000000000..aa0e433c526
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_oracle.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+set -e
+
+# Based on run_tdnn_7b.sh in the fisher swbd recipe
+
+# configs for 'chain'
+stage=0
+tdnn_affix=7b_oracle
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+train_set=train_sup11k
+ivector_train_set=semisup11k_250k
+tree_affix=
+nnet3_affix=_semi11k_250k
+chain_affix=_semi11k_250k
+exp=exp/semisup_11k
+gmm=tri3
+xent_regularize=0.1
+hidden_dim=725
+
+# training options
+num_epochs=4
+remove_egs=false
+common_egs_dir=
+minibatch_size=128
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=$exp/$gmm   # used to get training lattices (for chain supervision)
+treedir=$exp/chain${chain_affix}/tree_${tree_affix}
+lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_lats  # training lattices directory
+dir=$exp/chain${chain_affix}/tdnn${tdnn_affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${ivector_train_set}_sp_hires
+lang=data/lang_chain
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+local/nnet3/run_ivector_common_pca.sh --stage $stage --exp $exp \
+                                  --speed-perturb true \
+                                  --train-set $train_set \
+                                  --ivector-train-set $ivector_train_set \
+                                  --nnet3-affix $nnet3_affix || exit 1
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" data/${train_set}_sp \
+    data/lang $gmm_dir $lat_dir || exit 1;
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+ali_dir=${gmm_dir}_ali_${train_set}
+if [ $stage -le 11 ]; then
+  steps/align_fmllr.sh --cmd "$train_cmd" --nj 40 \
+    data/${train_set} data/lang $gmm_dir $ali_dir || exit 1
+
+  # Build a tree using our new topology.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --leftmost-questions-truncate -1 \
+      --cmd "$train_cmd" 11000 data/${train_set} $lang $ali_dir $treedir || exit 1
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=$hidden_dim
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=$hidden_dim target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  touch $dir/egs/.nodelete # keep egs around when that run dies.
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$common_egs_dir" \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize 0.1 \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width 150 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $treedir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+graph_dir=$dir/graph
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir
+fi
+
+decode_suff=
+if [ $stage -le 15 ]; then
+  iter_opts=
+  if [ ! -z $decode_iter ]; then
+    iter_opts=" --iter $decode_iter "
+  fi
+  for decode_set in dev test; do
+      (
+      num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $num_jobs --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1;
+      ) &
+  done
+fi
+wait;
+exit 0;
diff --git a/egs/fisher_english/s5/local/semisup/run_10k.sh b/egs/fisher_english/s5/local/semisup/run_10k.sh
new file mode 100644
index 00000000000..a5a293f3ce2
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/run_10k.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+# Apache 2.0
+
+. cmd.sh
+. path.sh 
+
+stage=-1
+train_stage=-10
+
+. utils/parse_options.sh
+
+set -o pipefail
+exp=exp/semisup_11k
+false && {
+utils/subset_data_dir.sh --speakers data/train_sup 11000 data/train_sup11k || exit 1
+utils/subset_data_dir.sh --shortest data/train_sup11k 5000 data/train_sup11k_short || exit 1
+utils/subset_data_dir.sh data/train_sup11k 5500 data/train_sup11k_half || exit 1
+
+steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
+  data/train_sup11k_short data/lang $exp/mono0a || exit 1
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+  data/train_sup11k_half data/lang $exp/mono0a $exp/mono0a_ali || exit 1
+
+steps/train_deltas.sh --cmd "$train_cmd" \
+  2000 10000 data/train_sup11k_half data/lang $exp/mono0a_ali $exp/tri1 || exit 1
+
+(utils/mkgraph.sh data/lang_test $exp/tri1 $exp/tri1/graph
+ steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri1/graph data/dev $exp/tri1/decode_dev)&
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+ data/train_sup11k data/lang $exp/tri1 $exp/tri1_ali || exit 1;
+
+steps/train_lda_mllt.sh --cmd "$train_cmd" \
+  2500 15000 data/train_sup11k data/lang $exp/tri1_ali $exp/tri2 || exit 1;
+
+(utils/mkgraph.sh data/lang_test $exp/tri2 $exp/tri2/graph
+ steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri2/graph data/dev $exp/tri2/decode_dev)&
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+  data/train_sup11k data/lang $exp/tri2 $exp/tri2_ali || exit 1;
+
+steps/train_sat.sh --cmd "$train_cmd" \
+  2500 15000 data/train_sup11k data/lang $exp/tri2_ali $exp/tri3 || exit 1;
+
+(
+  utils/mkgraph.sh data/lang_test $exp/tri3 $exp/tri3/graph
+  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri3/graph data/dev $exp/tri3/decode_dev
+)&
+
+utils/combine_data.sh data/semisup11k_250k data/train_sup11k data/train_unsup250k || exit 1
+}
+
+local/semisup/chain/tuning/run_tdnn_11k.sh \
+  --ivector-train-set semisup11k_250k --train-set train_sup11k --stage $stage --train-stage $train_stage || exit 1
diff --git a/egs/fisher_english/s5/local/semisup/run_15k.sh b/egs/fisher_english/s5/local/semisup/run_15k.sh
new file mode 100644
index 00000000000..7d5a2589a21
--- /dev/null
+++ b/egs/fisher_english/s5/local/semisup/run_15k.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+# Apache 2.0
+
+. cmd.sh
+. path.sh 
+
+stage=-1
+train_stage=-10
+
+. utils/parse_options.sh
+
+set -o pipefail
+exp=exp/semisup_15k
+
+false && {
+utils/subset_data_dir.sh --speakers data/train_sup 15000 data/train_sup15k || exit 1
+utils/subset_data_dir.sh --shortest data/train_sup15k 5000 data/train_sup15k_short || exit 1
+utils/subset_data_dir.sh data/train_sup15k 7500 data/train_sup15k_half || exit 1
+
+steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
+  data/train_sup15k_short data/lang $exp/mono0a || exit 1
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+  data/train_sup15k_half data/lang $exp/mono0a $exp/mono0a_ali || exit 1
+
+steps/train_deltas.sh --cmd "$train_cmd" \
+  2000 10000 data/train_sup15k_half data/lang $exp/mono0a_ali $exp/tri1 || exit 1
+
+(utils/mkgraph.sh data/lang_test $exp/tri1 $exp/tri1/graph
+ steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri1/graph data/dev $exp/tri1/decode_dev)&
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+ data/train_sup15k data/lang $exp/tri1 $exp/tri1_ali || exit 1;
+
+steps/train_lda_mllt.sh --cmd "$train_cmd" \
+  2500 15000 data/train_sup15k data/lang $exp/tri1_ali $exp/tri2 || exit 1;
+
+(utils/mkgraph.sh data/lang_test $exp/tri2 $exp/tri2/graph
+ steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri2/graph data/dev $exp/tri2/decode_dev)&
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+  data/train_sup15k data/lang $exp/tri2 $exp/tri2_ali || exit 1;
+
+steps/train_sat.sh --cmd "$train_cmd" \
+  2500 15000 data/train_sup15k data/lang $exp/tri2_ali $exp/tri3 || exit 1;
+
+(
+  utils/mkgraph.sh data/lang_test $exp/tri3 $exp/tri3/graph
+  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   $exp/tri3/graph data/dev $exp/tri3/decode_dev
+)&
+
+utils/combine_data.sh data/semisup15k_250k data/train_sup15k data/train_unsup250k || exit 1
+
+local/semisup/chain/tuning/run_tdnn_11k.sh \
+  --train-set train_sup15k \
+  --nnet3-affix _semi15k_250k \
+  --chain-affix _semi15k_250k \
+  --stage $stage --train-stage $train_stage \
+  --exp $exp \
+  --ivector-train-set semisup15k_250k || exit 1
+}
+
+local/semisup/chain/tuning/run_tdnn_oracle.sh \
+  --train-set semisup15k_250k \
+  --nnet3-affix _semi15k_250k \
+  --chain-affix _semi15k_250k_oracle \
+  --stage 9 --train-stage $train_stage \
+  --exp $exp \
+  --ivector-train-set semisup15k_250k || exit 1
diff --git a/egs/tedlium/s5_r2/local/chain/run_semisupervised.sh b/egs/tedlium/s5_r2/local/chain/run_semisupervised.sh
new file mode 100755
index 00000000000..674b8745c42
--- /dev/null
+++ b/egs/tedlium/s5_r2/local/chain/run_semisupervised.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# e.g. try lm-scale:
+# local/chain/run_semisupervised.sh --stage 1 --tdnn-affix _sup1a --egs-affix _lmwt1.0 --lattice-lm-scale 1.0
+
+
+# frames_per_eg 300
+# local/chain/run_semisupervised.sh --stage 1 --tdnn-affix _sup1d --unsup-frames-per-eg 300 --egs-affix _fpe300
+
+stage=0
+nj=30
+decode_nj=30
+base_train_set=train_cleaned # the starting point train-set
+base_gmm=tri3_cleaned  # the starting point of training on the supervised data (no flat start for now)
+semi_affix=  # affix relating train-set splitting proportion
+             # (currently supervised 25%) and the base train set (currently _cleaned), etc.
+tdnn_affix=_sup1a  # affix for the supervised chain-model directory
+train_supervised_opts="--stage -10 --train-stage -10"
+
+# combination options
+decode_affix=
+egs_affix=  # affix for the egs that are generated from unsupervised data and for the comined egs dir
+comb_affix=_comb1a  # affix for new chain-model directory trained on the combined supervised+unsupervised subsets
+unsup_frames_per_eg=  # if empty will be equal to the supervised model's config
+unsup_egs_weight=1.0
+lattice_lm_scale=0.1  # lm-scale for using the weights from unsupervised lattices
+lattice_prune_beam=  # If supplied will prune the lattices prior to getting egs for unsupervised data
+left_tolerance=2
+right_tolerance=2
+train_combined_opts="--num-epochs 5"
+
+# to tune:
+# frames_per_eg for unsupervised
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+supervised_set=${base_train_set}_sup${semi_affix}
+unsupervised_set=${base_train_set}_unsup${semi_affix}
+gmm=${base_gmm}_semi${semi_affix}  # the gmm to be supplied to chain/run_tdnn.sh
+nnet3_affix=_cleaned_semi${semi_affix}  # affix for nnet3 and chain dirs
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: preparing the supervised and unsupervised subsets of data"
+  if [ -f data/$supervised_set/feats.scp ]; then
+    echo "$0: supervised subset of data already exists; either delete it or use a later stage"
+    exit 1;
+  fi
+  mkdir -p data/$supervised_set
+  # get the list of supervised utts
+  num_utts=`wc -l data/$base_train_set/feats.scp | cut -d' ' -f1`
+  num_supervised_utts=$[num_utts/4]
+  num_unsupervised_utts=$[num_utts-num_supervised_utts]
+  echo "$0: spliting data/$base_train_set to supervised subset with"
+  echo "    $num_supervised_utts utts and unsupervised subset with $num_unsupervised_utts utts."
+  utils/shuffle_list.pl data/$base_train_set/feats.scp | cut -d' ' -f1 | \
+                        head -$num_supervised_utts > data/$supervised_set/supervised_uttlist || true
+  utils/shuffle_list.pl data/$base_train_set/feats.scp | cut -d' ' -f1 | \
+                        tail -$num_unsupervised_utts > data/$supervised_set/unsupervised_uttlist || true
+  utils/subset_data_dir.sh --utt-list data/$supervised_set/supervised_uttlist \
+                           data/$base_train_set data/$supervised_set || exit 1
+  utils/subset_data_dir.sh --utt-list data/$supervised_set/unsupervised_uttlist \
+                           data/$base_train_set data/$unsupervised_set || exit 1
+  utils/data/subset_data_dir.sh --utt-list data/$unsupervised_set/feats.scp \
+                                data/${base_train_set}_sp_hires data/${unsupervised_set}_hires
+fi
+
+if [ $stage -le -3 ]; then
+  # align the supervised subset with the current cleaned gmm
+  if [ -f $gmm/ali.1.gz ]; then
+    echo "$0: alignments in $gmm appear to already exist.  Please either remove them "
+    echo " ... or use a later --stage option."
+    exit 1
+  fi
+  echo "$0: aligning the supervised data data/${supervised_set}"
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+                       data/${supervised_set} data/lang exp/$base_gmm exp/$gmm
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: chain training on the supervised subset data/${supervised_set}"
+  local/chain/run_tdnn.sh $train_supervised_opts --remove-egs false \
+                          --train-set $supervised_set --gmm $gmm \
+                          --nnet3-affix $nnet3_affix --tdnn-affix $tdnn_affix
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: getting ivectors for the hires unsupervised data data/${unsupervised_set}_hires"
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "$nj" \
+            data/${unsupervised_set}_hires exp/nnet3${nnet3_affix}/extractor \
+            exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires
+fi
+
+chaindir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp_bi
+
+left_context=`cat $chaindir/egs/info/left_context`
+right_context=`cat $chaindir/egs/info/right_context`
+left_context_initial=`cat $chaindir/egs/info/left_context_initial`
+right_context_final=`cat $chaindir/egs/info/right_context_final`
+[ -z $unsup_frames_per_eg ] && unsup_frames_per_eg=`cat $chaindir/egs/info/frames_per_eg`
+frame_subsampling_factor=`cat $chaindir/frame_subsampling_factor`
+cmvn_opts=`cat $chaindir/cmvn_opts`
+
+if [ $stage -le 0 ]; then
+  echo "$0: getting the decoding lattices for the unsupervised subset using the chain model at: $chaindir"
+  steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \
+            --acwt 1.0 --post-decode-acwt 10.0 \
+            --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+            --scoring-opts "--min-lmwt 5 " \
+            $chaindir/graph data/${unsupervised_set}_hires $chaindir/decode_${unsupervised_set}${decode_affix}
+  steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
+              data/${unsupervised_set}_hires \
+              ${chaindir}/decode_${unsupervised_set}${decode_affix} ${chaindir}/decode_${unsupervised_set}${decode_affix}_rescore
+  ln -s ../final.mdl $chaindir/decode_${unsupervised_set}${decode_affix}_rescore/final.mdl || true
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: generating egs from the unsupervised data"
+  steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" --alignment-subsampling-factor 1 \
+             --left-tolerance $left_tolerance --right-tolerance $right_tolerance \
+             --left-context $left_context --right-context $right_context \
+             --left-context-initial $left_context_initial --right-context-final $right_context_final \
+             --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \
+             --frame-subsampling-factor $frame_subsampling_factor \
+             --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+             --lattice-prune-beam "$lattice_prune_beam" \
+             --egs-weight $unsup_egs_weight \
+             --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \
+             data/${unsupervised_set}_hires $chaindir \
+             ${chaindir}/decode_${unsupervised_set}${decode_affix}_rescore $chaindir/unsup_egs${decode_affix}${egs_affix}
+fi
+
+sup_egs_dir=$chaindir/egs
+unsup_egs_dir=$chaindir/unsup_egs${decode_affix}${egs_affix}
+comb_egs_dir=$chaindir/comb_egs${decode_affix}${egs_affix}
+if [ $stage -le 2 ]; then
+  echo "$0: combining supervised/unsupervised egs"
+  num_archives=`cat $chaindir/egs/info/num_archives`
+  mkdir -p $comb_egs_dir/log
+  cp {$sup_egs_dir,$comb_egs_dir}/train_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/valid_diagnostic.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/combine.cegs
+  cp {$sup_egs_dir,$comb_egs_dir}/cmvn_opts
+  cp -r $sup_egs_dir/info $comb_egs_dir
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/num_frames | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/num_frames
+  cat {$sup_egs_dir,$unsup_egs_dir}/info/egs_per_archive | awk '{s+=$1} END{print s}' > $comb_egs_dir/info/egs_per_archive
+  out_egs_list=
+  egs_list=
+  for n in $(seq $num_archives); do
+      egs_list="$egs_list $sup_egs_dir/cegs.$n.ark"
+      egs_list="$egs_list $unsup_egs_dir/cegs.$n.ark"
+      out_egs_list="$out_egs_list ark:$comb_egs_dir/cegs.$n.ark"
+  done
+  srand=0
+  $decode_cmd $comb_egs_dir/log/combine.log \
+              nnet3-chain-copy-egs "ark:cat $egs_list|" $out_egs_list
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: training on the supervised+unsupervised subset"
+  # the train-set and gmm do not matter as we are providing the egs
+  local/chain/run_tdnn.sh --stage 17 --remove-egs false --train-set $supervised_set --gmm $gmm \
+                          --nnet3-affix $nnet3_affix \
+                          --tdnn-affix ${tdnn_affix}${decode_affix}${egs_affix}${comb_affix} \
+                          --common-egs-dir $comb_egs_dir $train_combined_opts
+fi
+
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
index 99921a9bf61..1c4a032fc57 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
@@ -52,6 +52,7 @@ train_set=train_cleaned
 gmm=tri3_cleaned  # the gmm for the target data
 num_threads_ubm=32
 nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+num_epochs=4
 
 # The rest are configs specific to this script.  Most of the parameters
 # are just hardcoded at this level, in the commands below.
@@ -59,6 +60,7 @@ train_stage=-10
 tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
 tdnn_affix=1d  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
 common_egs_dir=  # you can set this to use previously dumped egs.
+remove_egs=true
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -212,13 +214,13 @@ if [ $stage -le 18 ]; then
     --egs.chunk-width 150 \
     --trainer.num-chunk-per-minibatch 128 \
     --trainer.frames-per-iter 1500000 \
-    --trainer.num-epochs 4 \
+    --trainer.num-epochs $num_epochs \
     --trainer.optimization.num-jobs-initial 2 \
     --trainer.optimization.num-jobs-final 12 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.max-param-change 2.0 \
-    --cleanup.remove-egs true \
+    --cleanup.remove-egs $remove_egs \
     --feat-dir $train_data_dir \
     --tree-dir $tree_dir \
     --lat-dir $lat_dir \
diff --git a/egs/wsj/s5/steps/best_path_weights.sh b/egs/wsj/s5/steps/best_path_weights.sh
new file mode 100755
index 00000000000..c2e0c60f961
--- /dev/null
+++ b/egs/wsj/s5/steps/best_path_weights.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+
+# Copyright 2014-17 Vimal Manohar
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script combines frame-level posteriors from different decode 
+# directories. The first decode directory is assumed to be the primary 
+# and is used to get the best path. The posteriors from other decode
+# directories are interpolated with the posteriors of the best path. 
+# The output is a new directory with final.mdl, tree from the primary 
+# decode-dir and the best path alignments and weights in a decode-directory 
+# with the same basename as the primary directory.
+# This is typically used to get better posteriors for semisupervised training
+# of DNN
+# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg 
+# exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm
+# Here the final.mdl and tree are copied from exp/tri6_nnet to 
+# exp/combine_dnn_sgmm. ali.*.gz obtained from the primary dir and 
+# the interpolated posteriors in weights.scp are placed in
+# exp/combine_dnn_sgmm/decode_train_unt.seg
+
+set -e
+
+# begin configuration section.
+cmd=run.pl
+stage=-10
+acwt=0.1
+#end configuration section.
+
+help_message="Usage: "$(basename $0)" [options] <data-dir> <graph-dir|lang-dir> <decode-dir1>[:weight] <decode-dir2>[:weight] [<decode-dir3>[:weight] ... ] <out-dir>
+     E.g. "$(basename $0)" data/train_unt.seg data/lang exp/tri1/decode:0.5 exp/tri2/decode:0.25 exp/tri3/decode:0.25 exp/combine
+Options:
+  --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes.
+";
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -lt 4 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+data=$1
+lang=$2
+dir=${@: -1}  # last argument to the script
+shift 2;
+decode_dirs=( $@ )  # read the remaining arguments into an array
+unset decode_dirs[${#decode_dirs[@]}-1]  # 'pop' the last argument which is odir
+num_sys=${#decode_dirs[@]}  # number of systems to combine
+
+mkdir -p $dir
+mkdir -p $dir/log
+
+decode_dir=`echo ${decode_dirs[0]} | cut -d: -f1`
+nj=`cat $decode_dir/num_jobs`
+
+mkdir -p $dir
+
+if [ $stage -lt -1 ]; then
+  mkdir -p $dir/log
+  $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
+    lattice-best-path --acoustic-scale=$acwt \
+      "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \
+      ark:/dev/null "ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
+fi
+
+src_dir=`dirname $decode_dir`
+
+cp $src_dir/cmvn_opts $dir/ || exit 1
+for f in final.mat splice_opts frame_subsampling_factor; do
+  [ -f $src_dir/$f ] && cp $src_dir/$f $dir 
+done
+
+weights_sum=0.0
+
+for i in `seq 0 $[num_sys-1]`; do
+  decode_dir=${decode_dirs[$i]}
+
+  weight=`echo $decode_dir | cut -d: -s -f2`
+  [ -z "$weight" ] && weight=1.0
+
+  if [ $i -eq 0 ]; then
+    file_list="\"ark:vector-scale --scale=$weight ark:$dir/weights.$i.JOB.ark ark:- |\""
+  else
+    file_list="$file_list \"ark,s,cs:vector-scale --scale=$weight ark:$dir/weights.$i.JOB.ark ark:- |\""
+  fi
+
+  weights_sum=`perl -e "print STDOUT $weights_sum + $weight"`
+done
+
+inv_weights_sum=`perl -e "print STDOUT 1.0/$weights_sum"`
+
+fdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
+
+for i in `seq 0 $[num_sys-1]`; do
+  if [ $stage -lt $i ]; then
+    decode_dir=`echo ${decode_dirs[$i]} | cut -d: -f1`
+
+    model=`dirname $decode_dir`/final.mdl  # model one level up from decode dir
+    tree=`dirname $decode_dir`/tree        # tree one level up from decode dir
+
+    for f in $model $decode_dir/lat.1.gz $tree; do
+      [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
+    done
+    if [ $i -eq 0 ]; then
+      nj=`cat $decode_dir/num_jobs` || exit 1;
+      cp $model $dir || exit 1
+      cp $tree $dir || exit 1
+      echo $nj > $dir/num_jobs
+    else
+      if [ $nj != `cat $decode_dir/num_jobs` ]; then
+        echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" 
+        exit 1;
+      fi
+    fi
+
+    $cmd JOB=1:$nj $dir/log/get_post.$i.JOB.log \
+      lattice-to-post --acoustic-scale=$acwt \
+        "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \
+      post-to-pdf-post $model ark,s,cs:- ark:- \| \
+      get-post-on-ali ark,s,cs:- "ark,s,cs:gunzip -c $dir/ali.JOB.gz | convert-ali $dir/final.mdl $model $tree ark,s,cs:- ark:- | ali-to-pdf $model ark,s,cs:- ark:- |" "ark,scp:$fdir/weights.$i.JOB.ark,$fdir/weights.$i.JOB.scp" || exit 1
+  fi
+done
+
+if [ $stage -lt $num_sys ]; then
+  if [ "$num_sys" -eq 1 ]; then
+    for n in `seq $nj`; do
+      cat $dir/weights.0.$n.scp 
+    done > $dir/weights.scp
+  else
+    $cmd JOB=1:$nj $dir/log/interpolate_post.JOB.log \
+      vector-sum $file_list ark:- \| \
+      vector-scale --scale=$inv_weights_sum ark:- \
+      ark,scp:$fdir/weights.JOB.ark,$fdir/weights.JOB.scp || exit 1
+    
+    for n in `seq $nj`; do
+      cat $dir/weights.$n.scp 
+    done > $dir/weights.scp
+  fi
+fi
+
+for n in `seq 1 $[num_sys-1]`; do
+  rm $dir/weights.$n.*.ark $dir/weights.$n.*.scp
+done
+
+exit 0
diff --git a/egs/wsj/s5/steps/conf/apply_calibration.sh b/egs/wsj/s5/steps/conf/apply_calibration.sh
index c1a22e274b8..48f9e17d30b 100755
--- a/egs/wsj/s5/steps/conf/apply_calibration.sh
+++ b/egs/wsj/s5/steps/conf/apply_calibration.sh
@@ -28,6 +28,7 @@ caldir=$4
 dir=$5
 
 model=$latdir/../final.mdl # assume model one level up from decoding dir.
+model_dir=$latdir/..
 calibration=$caldir/calibration.mdl
 word_feats=$caldir/word_feats
 word_categories=$caldir/word_categories
@@ -49,6 +50,12 @@ cp $calibration $dir/calibration.mdl
 cp $word_feats $dir/word_feats
 cp $word_categories $dir/word_categories
 
+frame_shift_opt=
+if [ -f $model_dir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=$(cat $model_dir/frame_subsampling_factor)
+  frame_shift_opt="--frame-shift=0.0$frame_subsampling_factor"
+fi
+
 # Create the ctm with raw confidences,
 # - we keep the timing relative to the utterance,
 if [ $stage -le 0 ]; then
@@ -58,7 +65,7 @@ if [ $stage -le 0 ]; then
     lattice-push --push-strings=false ark:- ark:- \| \
     lattice-align-words-lexicon --max-expand=10.0 \
      $lang/phones/align_lexicon.int $model ark:- ark:- \| \
-    lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
+    lattice-to-ctm-conf --decode-mbr=$decode_mbr $frame_shift_opt ark:- - \| \
     utils/int2sym.pl -f 5 $lang/words.txt \
     '>' $dir/JOB.ctm
   # Merge and clean,
@@ -76,7 +83,7 @@ fi
 # Create the forwarding data for logistic regression,
 if [ $stage -le 2 ]; then
   steps/conf/prepare_calibration_data.py --conf-feats $dir/forward_feats.ark \
-    --lattice-depth $latdepth $dir/ctm_int $word_feats $word_categories
+    --lattice-depth $latdepth $frame_shift_opt $dir/ctm_int $word_feats $word_categories
 fi
 
 # Apply calibration model to dev,
diff --git a/egs/wsj/s5/steps/conf/convert_ctm_to_weights.py b/egs/wsj/s5/steps/conf/convert_ctm_to_weights.py
new file mode 100755
index 00000000000..02a616b2c03
--- /dev/null
+++ b/egs/wsj/s5/steps/conf/convert_ctm_to_weights.py
@@ -0,0 +1,101 @@
+#! /usr/bin/env python
+
+import argparse
+import logging
+import sys
+
+sys.path.insert(0, 'steps')
+import libs.common as common_lib
+
+logger = logging.getLogger('libs')
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - "
+                              "%(funcName)s - %(levelname)s ] %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script converts CTM to per-frame weights by the word
+        posteriors in the CTM as the weights.""")
+
+    parser.add_argument("--frame-shift", type=float, default=0.01,
+                        help="Frame shift value in seconds")
+    parser.add_argument("--default-weight", type=float, default=1.0,
+                        help="Default weight on silence frames")
+    parser.add_argument("segments_in", type=str, help="Input segments file")
+    parser.add_argument("ctm_in", type=str, help="Input utterance-level CTM "
+                        "file i.e. the first column has utterance-ids")
+    parser.add_argument("weights_out", type=str, help="Output per-frame "
+                        "weights vector written in Kaldi text archive format")
+
+    args = parser.parse_args()
+
+    return args
+
+
+def run(args):
+    utt2num_frames = {}
+    with common_lib.smart_open(args.segments_in) as segments_reader:
+        for line in segments_reader.readlines():
+            parts = line.strip().split()
+            if len(parts) not in [4, 5]:
+                raise RuntimeError("Invalid line {0} in segments file {1}"
+                                   "".format(line.strip(), args.segments_in))
+            utt2num_frames[parts[0]] = int((float(parts[3]) - float(parts[2]))
+                                           / args.frame_shift + 0.5)
+
+    num_utt = 0
+    with common_lib.smart_open(args.ctm_in) as ctm_reader, \
+            common_lib.smart_open(args.weights_out, 'w') as weights_writer:
+        prev_utt = None
+        weights = []
+        for line in ctm_reader.readlines():
+            parts = line.strip().split()
+            if len(parts) not in [5, 6]:
+                raise RuntimeError("Invalid line {0} in CTM file {1}"
+                                   "".format(line.strip(), args.ctm_in))
+
+            utt = parts[0]
+            if utt != prev_utt:
+                if prev_utt is not None:
+                    assert len(weights) >= utt2num_frames[prev_utt]
+                    common_lib.write_vector_ascii(weights_writer, weights,
+                                                  key=prev_utt)
+                weights = [args.default_weight for x in
+                           range(utt2num_frames[utt])]
+
+            start_time = float(parts[2])
+            dur = float(parts[3])
+            prob = 1.0 if len(parts) == 5 else float(parts[5])
+
+            start_frame = int(start_time / args.frame_shift + 0.5)
+            length = int(dur / args.frame_shift)
+
+            if len(weights) < start_frame + length:
+                weights.extend([args.default_weight for x in
+                                   range(len(weights), start_frame + length)])
+                for x in range(start_frame, start_frame + length):
+                    weights[x] = prob
+
+            assert len(weights) >= start_frame + length
+            prev_utt = utt
+            num_utt += 1
+        assert len(weights) >= utt2num_frames[prev_utt]
+        common_lib.write_vector_ascii(weights_writer, weights,
+                                      key=prev_utt)
+
+    if num_utt == 0:
+        raise RuntimeError("Failed to process any utterances")
+
+
+def main():
+    args = get_args()
+    run(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/wsj/s5/steps/conf/prepare_calibration_data.py b/egs/wsj/s5/steps/conf/prepare_calibration_data.py
index bc8f92a2f7f..753771b1d89 100755
--- a/egs/wsj/s5/steps/conf/prepare_calibration_data.py
+++ b/egs/wsj/s5/steps/conf/prepare_calibration_data.py
@@ -10,7 +10,7 @@
 Prepare input features and training targets for logistic regression,
 which calibrates the Minimum Bayes Risk posterior confidences.
 
-The logisitc-regression input features are: 
+The logisitc-regression input features are:
 - posteriors from 'ctm' transformed by logit,
 - logarithm of word-length in letters,
 - 10base logarithm of unigram probability of a word from language model,
@@ -34,6 +34,8 @@
 parser.add_option("--conf-targets", help="Targets file for logistic regression (no targets generated if '') [default %default]", default='')
 parser.add_option("--conf-feats", help="Feature file for logistic regression. [default %default]", default='')
 parser.add_option("--lattice-depth", help="Per-frame lattice depths, ascii-ark (optional). [default %default]", default='')
+parser.add_option("--frame-shift", type=float, default=0.01,
+                  help="Frame shift value in seconds [default %default]")
 (o, args) = parser.parse_args()
 
 if len(args) != 3:
@@ -63,11 +65,11 @@
 if o.conf_targets != '':
   with open(o.conf_targets,'w') as f:
     for (utt, chan, beg, dur, wrd_id, conf, score_tag) in ctm:
-      # Skip the words we don't know if being correct, 
-      if score_tag == 'U': continue 
+      # Skip the words we don't know if being correct,
+      if score_tag == 'U': continue
       # Some words are excluded from training (partial words, hesitations, etc.),
       # (Value: 1 == keep word, 0 == exclude word from the targets),
-      if not word_filter[wrd_id]: continue 
+      if not word_filter[wrd_id]: continue
       # Build the key,
       key = "%s^%s^%s^%s^%s,%s,%s" % (utt, chan, beg, dur, wrd_id, conf, score_tag)
       # Build the target,
@@ -102,7 +104,7 @@
     # - log of word-length,
     log_word_length = math.log(word_length[wrd_id]) # i.e. number of phones in a word,
     # - categorical distribution of words (with frequency higher than min-count),
-    wrd_1_of_k = [0]*wrd_cat_num; 
+    wrd_1_of_k = [0]*wrd_cat_num;
     wrd_1_of_k[wrd_to_cat[wrd_id]] = 1;
 
     # Compose the input feature vector,
@@ -110,10 +112,10 @@
 
     # Optionally add average-depth of lattice at the word position,
     if o.lattice_depth != '':
-      depth_slice = depths[utt][int(round(100.0*float(beg))):int(round(100.0*(float(beg)+float(dur))))]
+      depth_slice = depths[utt][int(float(beg) / o.frame_shift + 0.5):int((float(beg) + max(o.frame_shift, float(dur))) / o.frame_shift + 0.5)]
       log_avg_depth = math.log(float(sum(depth_slice))/len(depth_slice))
       feats += [ log_avg_depth ]
 
-    # Store the input features, 
+    # Store the input features,
     f.write(key + ' [ ' + ' '.join(map(str,feats)) + ' ]\n')
 
diff --git a/egs/wsj/s5/steps/conf/train_calibration.sh b/egs/wsj/s5/steps/conf/train_calibration.sh
index c2aca05056e..9a8451c9f85 100755
--- a/egs/wsj/s5/steps/conf/train_calibration.sh
+++ b/egs/wsj/s5/steps/conf/train_calibration.sh
@@ -12,7 +12,7 @@
 # (- categorical distribution of 'lang/words.txt', DISABLED)
 
 # begin configuration section.
-cmd=
+cmd=run.pl
 lmwt=12
 decode_mbr=true
 word_min_count=10 # Minimum word-count for single-word category,
@@ -43,6 +43,7 @@ latdir=$4
 dir=$5
 
 model=$latdir/../final.mdl # assume model one level up from decoding dir.
+model_dir=$latdir/..
 
 for f in $data/text $lang/words.txt $word_feats $latdir/lat.1.gz; do
   [ ! -f $f ] && echo "$0: Missing file $f" && exit 1
@@ -57,6 +58,12 @@ echo $lmwt >$dir/lmwt
 echo $decode_mbr >$dir/decode_mbr
 cp $word_feats $dir/word_feats
 
+frame_shift_opt=
+if [ -f $model_dir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=$(cat $model_dir/frame_subsampling_factor)
+  frame_shift_opt="--frame-shift=0.0$frame_subsampling_factor"
+fi
+
 # Create the ctm with raw confidences,
 # - we keep the timing relative to the utterance,
 if [ $stage -le 0 ]; then
@@ -66,7 +73,7 @@ if [ $stage -le 0 ]; then
     lattice-push --push-strings=false ark:- ark:- \| \
     lattice-align-words-lexicon --max-expand=10.0 \
      $lang/phones/align_lexicon.int $model ark:- ark:- \| \
-    lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
+    lattice-to-ctm-conf --decode-mbr=$decode_mbr $frame_shift_opt ark:- - \| \
     utils/int2sym.pl -f 5 $lang/words.txt \
     '>' $dir/JOB.ctm
   # Merge and clean,
@@ -104,7 +111,7 @@ fi
 if [ $stage -le 3 ]; then
   steps/conf/prepare_calibration_data.py \
     --conf-targets $dir/train_targets.ark --conf-feats $dir/train_feats.ark \
-    --lattice-depth $latdepth $dir/ctm_aligned_int $word_feats $dir/word_categories
+    --lattice-depth $latdepth $frame_shift_opt $dir/ctm_aligned_int $word_feats $dir/word_categories
 fi
 
 # Train the logistic regression,
diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py
index 8727ccd1a5e..d147cd7ba86 100644
--- a/egs/wsj/s5/steps/libs/common.py
+++ b/egs/wsj/s5/steps/libs/common.py
@@ -358,6 +358,33 @@ def write_matrix_ascii(file_or_fd, mat, key=None):
         if fd is not file_or_fd : fd.close()
 
 
+def write_vector_ascii(file_or_fd, vec, key=None):
+    """This function writes the vector 'vec' stored as a list
+    in kaldi vector text format.
+    The destination can be a file or an opened file descriptor.
+    If key is provided, then vector is written to an archive with the 'key'
+    as the index field.
+    """
+    try:
+        fd = open(file_or_fd, 'w')
+    except TypeError:
+        # 'file_or_fd' is opened file descriptor,
+        fd = file_or_fd
+
+    try:
+        if key is not None:
+            print ("{0} [".format(key),
+                   file=fd, end=' ')  # ark-files have keys (utterance-id)
+        else:
+            print (" [", file=fd, end=' ')
+
+        line = ' '.join(["{0:f}".format(x) for x in vec])
+        line += " ]"
+        print (line, file=fd)
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+
 def read_matrix_ascii(file_or_fd):
     """This function reads a matrix in kaldi matrix text format
     and stores it as a list of lists.
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
index fedce12dda0..32b320c495e 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
@@ -129,7 +129,8 @@ def train_new_models(dir, iter, srand, num_jobs,
                      momentum, max_param_change,
                      shuffle_buffer_size, num_chunk_per_minibatch_str,
                      frame_subsampling_factor, run_opts,
-                     backstitch_training_scale=0.0, backstitch_training_interval=1):
+                     backstitch_training_scale=0.0, backstitch_training_interval=1,
+                     use_multitask_egs=False):
     """
     Called from train_one_iteration(), this method trains new models
     with 'num_jobs' jobs, and
@@ -140,6 +141,12 @@ def train_new_models(dir, iter, srand, num_jobs,
     to use for each job is a little complex, so we spawn each one separately.
     this is no longer true for RNNs as we use do not use the --frame option
     but we use the same script for consistency with FF-DNN code
+
+    use_multitask_egs : True, if different examples used to train multiple
+                        tasks or outputs, e.g.multilingual training.
+                        multilingual egs can be generated using get_egs.sh and
+                        steps/nnet3/multilingual/allocate_multilingual_examples.py,
+                        those are the top-level scripts.
     """
 
     deriv_time_opts = []
@@ -167,6 +174,12 @@ def train_new_models(dir, iter, srand, num_jobs,
         frame_shift = ((archive_index + k/num_archives)
                        % frame_subsampling_factor)
 
+        multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+            egs_dir,
+            egs_prefix="cegs.",
+            archive_index=archive_index,
+            use_multitask_egs=use_multitask_egs)
+        scp_or_ark = "scp" if use_multitask_egs else "ark"
         cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(dir=dir,
                                                                   iter=iter)
                           if iter > 0 else "") +
@@ -186,9 +199,9 @@ def train_new_models(dir, iter, srand, num_jobs,
                     --backstitch-training-interval={backstitch_training_interval} \
                     --srand={srand} \
                     "{raw_model}" {dir}/den.fst \
-                    "ark,bg:nnet3-chain-copy-egs \
+                    "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} \
                         --frame-shift={fr_shft} \
-                        ark:{egs_dir}/cegs.{archive_index}.ark ark:- | \
+                        {scp_or_ark}:{egs_dir}/cegs.{archive_index}.{scp_or_ark} ark:- | \
                         nnet3-chain-shuffle-egs --buffer-size={buf_size} \
                         --srand={srand} ark:- ark:- | nnet3-chain-merge-egs \
                         --minibatch-size={num_chunk_per_mb} ark:- ark:- |" \
@@ -210,17 +223,17 @@ def train_new_models(dir, iter, srand, num_jobs,
                         raw_model=raw_model_string,
                         egs_dir=egs_dir, archive_index=archive_index,
                         buf_size=shuffle_buffer_size,
-                        num_chunk_per_mb=num_chunk_per_minibatch_str),
+                        num_chunk_per_mb=num_chunk_per_minibatch_str,
+                        multitask_egs_opts=multitask_egs_opts,
+                        scp_or_ark=scp_or_ark),
             require_zero_status=True)
 
         threads.append(thread)
 
-
     for thread in threads:
         thread.join()
 
 
-
 def train_one_iteration(dir, iter, srand, egs_dir,
                         num_jobs, num_archives_processed, num_archives,
                         learning_rate, shrinkage_value,
@@ -232,7 +245,8 @@ def train_one_iteration(dir, iter, srand, egs_dir,
                         momentum, max_param_change, shuffle_buffer_size,
                         frame_subsampling_factor,
                         run_opts, dropout_edit_string="",
-                        backstitch_training_scale=0.0, backstitch_training_interval=1):
+                        backstitch_training_scale=0.0, backstitch_training_interval=1,
+                        use_multitask_egs=False):
     """ Called from steps/nnet3/chain/train.py for one iteration for
     neural network training with LF-MMI objective
 
@@ -264,7 +278,8 @@ def train_one_iteration(dir, iter, srand, egs_dir,
     compute_train_cv_probabilities(
         dir=dir, iter=iter, egs_dir=egs_dir,
         l2_regularize=l2_regularize, xent_regularize=xent_regularize,
-        leaky_hmm_coefficient=leaky_hmm_coefficient, run_opts=run_opts)
+        leaky_hmm_coefficient=leaky_hmm_coefficient, run_opts=run_opts,
+        use_multitask_egs=use_multitask_egs)
 
     if iter > 0:
         # Runs in the background
@@ -321,7 +336,8 @@ def train_one_iteration(dir, iter, srand, egs_dir,
                      # first few iterations (hard-coded as 15)
                      backstitch_training_scale=(backstitch_training_scale *
                          iter / 15 if iter < 15 else backstitch_training_scale),
-                     backstitch_training_interval=backstitch_training_interval)
+                     backstitch_training_interval=backstitch_training_interval,
+                     use_multitask_egs=use_multitask_egs)
 
     [models_to_average, best_model] = common_train_lib.get_successful_models(
          num_jobs, '{0}/log/train.{1}.%.log'.format(dir, iter))
@@ -373,7 +389,7 @@ def check_for_required_files(feat_dir, tree_dir, lat_dir):
 
 def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts,
                                    max_lda_jobs=None, rand_prune=4.0,
-                                   lda_opts=None):
+                                   lda_opts=None, use_multitask_egs=False):
     """ Function to estimate and write LDA matrix from cegs
 
     This function is exactly similar to the version in module
@@ -383,17 +399,28 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts,
     if max_lda_jobs is not None:
         if num_lda_jobs > max_lda_jobs:
             num_lda_jobs = max_lda_jobs
+    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+        egs_dir,
+        egs_prefix="cegs.",
+        archive_index="JOB",
+        use_multitask_egs=use_multitask_egs)
+    scp_or_ark = "scp" if use_multitask_egs else "ark"
+    egs_rspecifier = (
+        "ark:nnet3-chain-copy-egs {multitask_egs_opts} "
+        "{scp_or_ark}:{egs_dir}/cegs.JOB.{scp_or_ark} ark:- |"
+        "".format(egs_dir=egs_dir, scp_or_ark=scp_or_ark,
+                  multitask_egs_opts=multitask_egs_opts))
 
     # Write stats with the same format as stats for LDA.
     common_lib.execute_command(
         """{command} JOB=1:{num_lda_jobs} {dir}/log/get_lda_stats.JOB.log \
                 nnet3-chain-acc-lda-stats --rand-prune={rand_prune} \
-                {dir}/init.raw "ark:{egs_dir}/cegs.JOB.ark" \
+                {dir}/init.raw "{egs_rspecifier}" \
                 {dir}/JOB.lda_stats""".format(
                     command=run_opts.command,
                     num_lda_jobs=num_lda_jobs,
                     dir=dir,
-                    egs_dir=egs_dir,
+                    egs_rspecifier=egs_rspecifier,
                     rand_prune=rand_prune))
 
     # the above command would have generated dir/{1..num_lda_jobs}.lda_stats
@@ -448,32 +475,50 @@ def prepare_initial_acoustic_model(dir, run_opts, srand=-1):
 
 def compute_train_cv_probabilities(dir, iter, egs_dir, l2_regularize,
                                    xent_regularize, leaky_hmm_coefficient,
-                                   run_opts):
+                                   run_opts,
+                                   use_multitask_egs=False):
     model = '{0}/{1}.mdl'.format(dir, iter)
+    scp_or_ark = "scp" if use_multitask_egs else "ark"
+    egs_suffix = ".scp" if use_multitask_egs else ".cegs"
+
+    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+                             egs_dir,
+                             egs_prefix="valid_diagnostic.",
+                             use_multitask_egs=use_multitask_egs)
+
 
     common_lib.background_command(
         """{command} {dir}/log/compute_prob_valid.{iter}.log \
                 nnet3-chain-compute-prob --l2-regularize={l2} \
                 --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                 "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
-                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/valid_diagnostic.cegs \
+                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/valid_diagnostic{egs_suffix} \
                     ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
         """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                    l2=l2_regularize, leaky=leaky_hmm_coefficient,
                    xent_reg=xent_regularize,
-                   egs_dir=egs_dir))
+                   egs_dir=egs_dir,
+                   multitask_egs_opts=multitask_egs_opts,
+                   scp_or_ark=scp_or_ark, egs_suffix=egs_suffix))
+
+    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+                             egs_dir,
+                             egs_prefix="train_diagnostic.",
+                             use_multitask_egs=use_multitask_egs)
 
     common_lib.background_command(
         """{command} {dir}/log/compute_prob_train.{iter}.log \
                 nnet3-chain-compute-prob --l2-regularize={l2} \
                 --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                 "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
-                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/train_diagnostic.cegs \
+                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/train_diagnostic{egs_suffix} \
                     ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
         """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                    l2=l2_regularize, leaky=leaky_hmm_coefficient,
                    xent_reg=xent_regularize,
-                   egs_dir=egs_dir))
+                   egs_dir=egs_dir,
+                   multitask_egs_opts=multitask_egs_opts,
+                   scp_or_ark=scp_or_ark, egs_suffix=egs_suffix))
 
 
 def compute_progress(dir, iter, run_opts):
@@ -493,10 +538,12 @@ def compute_progress(dir, iter, run_opts):
                    model=model,
                    prev_model=prev_model))
 
+
 def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_str,
                    egs_dir, leaky_hmm_coefficient, l2_regularize,
                    xent_regularize, run_opts,
-                   sum_to_one_penalty=0.0):
+                   sum_to_one_penalty=0.0,
+                   use_multitask_egs=False):
     """ Function to do model combination
 
     In the nnet3 setup, the logic
@@ -522,6 +569,14 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st
             print("{0}: warning: model file {1} does not exist "
                   "(final combination)".format(sys.argv[0], model_file))
 
+    scp_or_ark = "scp" if use_multitask_egs else "ark"
+    egs_suffix = ".scp" if use_multitask_egs else ".cegs"
+
+    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+                             egs_dir,
+                             egs_prefix="combine.",
+                             use_multitask_egs=use_multitask_egs)
+
     # We reverse the order of the raw model strings so that the freshest one
     # goes first.  This is important for systems that include batch
     # normalization-- it means that the freshest batch-norm stats are used.
@@ -539,7 +594,7 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st
                 --sum-to-one-penalty={penalty} \
                 --enforce-positive-weights=true \
                 --verbose=3 {dir}/den.fst {raw_models} \
-                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/combine.cegs ark:- | \
+                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/combine{egs_suffix} ark:- | \
                     nnet3-chain-merge-egs --minibatch-size={num_chunk_per_mb} \
                     ark:- ark:- |" - \| \
                 nnet3-am-copy --set-raw-nnet=- {dir}/{num_iters}.mdl \
@@ -554,7 +609,9 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st
                     penalty=sum_to_one_penalty,
                     num_chunk_per_mb=num_chunk_per_minibatch_str,
                     num_iters=num_iters,
-                    egs_dir=egs_dir))
+                    egs_dir=egs_dir,
+                    multitask_egs_opts=multitask_egs_opts,
+                    scp_or_ark=scp_or_ark, egs_suffix=egs_suffix))
 
     # Compute the probability of the final, combined model with
     # the same subset we used for the previous compute_probs, as the
@@ -563,4 +620,5 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st
         dir=dir, iter='final', egs_dir=egs_dir,
         l2_regularize=l2_regularize, xent_regularize=xent_regularize,
         leaky_hmm_coefficient=leaky_hmm_coefficient,
-        run_opts=run_opts)
+        run_opts=run_opts,
+        use_multitask_egs=use_multitask_egs)
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py
index 559e7498fb7..a3beb2e5bef 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/common.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py
@@ -399,7 +399,7 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_extractor_id,
         if (feat_dim != 0 and feat_dim != egs_feat_dim) or (ivector_dim != egs_ivector_dim):
             raise Exception("There is mismatch between featdim/ivector_dim of "
                             "the current experiment and the provided "
-                            "egs directory")
+                            "egs directory: egs_dim: {0} vs {1} and ivector_dim {2} vs {3}".format(feat_dim, egs_feat_dim, ivector_dim, egs_ivector_dim))
 
         if (((egs_ivector_id is None) and (ivector_extractor_id is not None)) or
             ((egs_ivector_id is not None) and (ivector_extractor_id is None))):
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
index 4d142ba3266..47abec00bde 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
@@ -322,21 +322,32 @@ def train_one_iteration(dir, iter, srand, egs_dir,
 
 def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts,
                                    max_lda_jobs=None, rand_prune=4.0,
-                                   lda_opts=None):
+                                   lda_opts=None, use_multitask_egs=False):
     if max_lda_jobs is not None:
         if num_lda_jobs > max_lda_jobs:
             num_lda_jobs = max_lda_jobs
+    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
+        egs_dir,
+        egs_prefix="egs.",
+        archive_index="JOB",
+        use_multitask_egs=use_multitask_egs)
+    scp_or_ark = "scp" if use_multitask_egs else "ark"
+    egs_rspecifier = (
+        "ark:nnet3-copy-egs {multitask_egs_opts} "
+        "{scp_or_ark}:{egs_dir}/egs.JOB.{scp_or_ark} ark:- |"
+        "".format(egs_dir=egs_dir, scp_or_ark=scp_or_ark,
+                  multitask_egs_opts=multitask_egs_opts))
 
     # Write stats with the same format as stats for LDA.
     common_lib.execute_command(
         """{command} JOB=1:{num_lda_jobs} {dir}/log/get_lda_stats.JOB.log \
                 nnet3-acc-lda-stats --rand-prune={rand_prune} \
-                {dir}/init.raw "ark:{egs_dir}/egs.JOB.ark" \
+                {dir}/init.raw "{egs_rspecifier}" \
                 {dir}/JOB.lda_stats""".format(
                     command=run_opts.command,
                     num_lda_jobs=num_lda_jobs,
                     dir=dir,
-                    egs_dir=egs_dir,
+                    egs_rspecifier=egs_rspecifier,
                     rand_prune=rand_prune))
 
     # the above command would have generated dir/{1..num_lda_jobs}.lda_stats
diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree_from_lats.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree_from_lats.sh
new file mode 100755
index 00000000000..6ed988062b3
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain/build_tree_from_lats.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
+#  Apache 2.0.
+
+
+# This script builds a tree for use in the 'chain' systems (although the script
+# itself is pretty generic and doesn't use any 'chain' binaries).  This is just
+# like the first stages of a standard system, like 'train_sat.sh', except it
+# does 'convert-ali' to convert alignments to a monophone topology just created
+# from the 'lang' directory (in case the topology is different from where you
+# got the system's alignments from), and it stops after the tree-building and
+# model-initialization stage, without re-estimating the Gaussians or training
+# the transitions.
+
+
+# Begin configuration section.
+stage=-5
+exit_stage=-100 # you can use this to require it to exit at the
+                # beginning of a specific stage.  Not all values are
+                # supported.
+cmd=run.pl
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+frame_subsampling_factor=1
+alignment_subsampling_factor=1
+leftmost_questions_truncate=-1  # note: this used to default to 10, but we never
+                                # use this option now with value != -1, and
+                                # we're changing the default
+acwt=0.1
+tree_stats_opts=
+cluster_phones_opts=
+repeat_frames=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <data> <lang> <lat-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_lats_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --repeat-frames <true|false>                     # Only affects alignment conversion at"
+  echo "                                                   # the end. If true, generate an "
+  echo "                                                   # alignment using the frame-subsampled "
+  echo "                                                   # topology that is repeated "
+  echo "                                                   # --frame-subsampling-factor times "
+  echo "                                                   # and interleaved, to be the same "
+  echo "                                                   # length as the original alignment "
+  echo "                                                   # (useful for cross-entropy training "
+  echo "                                                   # of reduced frame rate systems)."
+  exit 1;
+fi
+
+numleaves=$1
+data=$2
+lang=$3
+lat_dir=$4
+dir=$5
+
+for f in $data/feats.scp $lang/phones.txt $lat_dir/final.mdl $lat_dir/lat.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+oov=`cat $lang/oov.int`
+nj=`cat $lat_dir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $lat_dir/splice_opts 2>/dev/null` # frame-splicing options.
+cmvn_opts=`cat $lat_dir/cmvn_opts 2>/dev/null`
+delta_opts=`cat $lat_dir/delta_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $lat_dir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cp $lat_dir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+cp $lat_dir/delta_opts $dir 2>/dev/null # delta option.
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $lat_dir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [ -f $lat_dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $lat_dir/final.mat ark:- ark:- |"
+    cp $lat_dir/final.mat $dir
+    cp $lat_dir/full.mat $dir 2>/dev/null
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# Add fMLLR transforms if available
+if [ -f $lat_dir/trans.1 ]; then
+  echo "$0: Using transforms from $lat_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$lat_dir/trans.JOB ark:- ark:- |"
+fi
+
+# Do subsampling of feats, if needed
+if [ $frame_subsampling_factor -gt 1 ]; then
+  feats="$feats subsample-feats --n=$frame_subsampling_factor ark:- ark:- |"
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Initializing monophone model (for alignment conversion, in case topology changed)"
+
+  [ ! -f $lang/phones/sets.int ] && exit 1;
+  shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+  # get feature dimension
+  example_feats="`echo $feats | sed s/JOB/1/g`";
+  if ! feat_dim=$(feat-to-dim "$example_feats" - 2>/dev/null) || [ -z $feat_dim ]; then
+    feat-to-dim "$example_feats" - # to see the error message.
+    echo "error getting feature dimension"
+    exit 1;
+  fi
+  $cmd JOB=1 $dir/log/init_mono.log \
+    gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
+      $dir/mono.mdl $dir/mono.tree || exit 1;
+fi
+
+
+if [ $stage -le -4 ]; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    lattice-best-path --acoustic-scale=$acwt \
+      "ark:gunzip -c $lat_dir/lat.JOB.gz|" ark:/dev/null ark:- \| \
+    convert-ali --frame-subsampling-factor=$alignment_subsampling_factor \
+      $lat_dir/final.mdl $dir/mono.mdl $dir/mono.tree ark:- ark:- \| \
+    acc-tree-stats $context_opts $tree_stats_opts --ci-phones=$ciphonelist $dir/mono.mdl \
+      "$feats" ark:- $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  $cmd $dir/log/questions.log \
+     cluster-phones $cluster_phones_opts $context_opts $dir/treeacc \
+     $lang/phones/sets.int $dir/questions.int || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  $cmd $dir/log/compile_questions.log \
+    compile-questions --leftmost-questions-truncate=$leftmost_questions_truncate \
+      $context_opts $lang/topo $dir/questions.int $dir/questions.qst || exit 1;
+
+  # questions_truncated.int will be needed later on when we build the phone
+  # language model for 'chain' training.  It's a mechanism of keeping the graph
+  # small.
+  if [ $leftmost_questions_truncate -gt 0 ]; then
+     head -n $leftmost_questions_truncate $dir/questions.int > $dir/questions_truncated.int
+  else
+    cp $dir/questions.int $dir/questions_truncated.int
+  fi
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments to the new tree.  Note: we likely will not use these
+  # converted alignments in the CTC system directly, but they could be useful
+  # for other purposes.
+  echo "$0: Converting alignments from $lat_dir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    lattice-best-path --acoustic-scale=$acwt \
+      "ark:gunzip -c $lat_dir/lat.JOB.gz |" ark:/dev/null ark:- \| \
+    convert-ali --repeat-frames=$repeat_frames \
+      --frame-subsampling-factor=$alignment_subsampling_factor \
+      $lat_dir/final.mdl $dir/1.mdl $dir/tree \
+      ark:- "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+cp $dir/1.mdl $dir/final.mdl
+
+echo $0: Done building tree
+
diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh
new file mode 100755
index 00000000000..6892a2ff1ee
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh
@@ -0,0 +1,275 @@
+#!/bin/bash
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
+#           2017  Vimal Manohar
+#  Apache 2.0.
+
+# This script is similar to steps/nnet3/chain/build_tree.sh but supports 
+# getting statistics from multiple alignment sources.
+
+
+# Begin configuration section.
+stage=-5
+exit_stage=-100 # you can use this to require it to exit at the
+                # beginning of a specific stage.  Not all values are
+                # supported.
+cmd=run.pl
+use_fmllr=true  # If true, fmllr transforms will be applied from the alignment directories.
+                # Otherwise, no fmllr will be applied even if alignment directory contains trans.*
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+frame_subsampling_factor=1  # frame subsampling factor of output w.r.t. to the input features
+tree_stats_opts=
+cluster_phones_opts=
+repeat_frames=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -lt 5 ]; then
+  echo "Usage: steps/nnet3/chain/build_tree_multiple_sources.sh <#leaves> <lang> <data1> <ali-dir1> [<data2> <ali-dir2> ... <data> <ali-dirN>] <exp-dir>"
+  echo " e.g.: steps/nnet3/chain/build_tree_multiple_sources.sh 15000 data/lang data/train_sup exp/tri3_ali data/train_unsup exp/tri3/best_path_train_unsup exp/tree_semi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --repeat-frames <true|false>                     # Only affects alignment conversion at"
+  echo "                                                   # the end. If true, generate an "
+  echo "                                                   # alignment using the frame-subsampled "
+  echo "                                                   # topology that is repeated "
+  echo "                                                   # --frame-subsampling-factor times "
+  echo "                                                   # and interleaved, to be the same "
+  echo "                                                   # length as the original alignment "
+  echo "                                                   # (useful for cross-entropy training "
+  echo "                                                   # of reduced frame rate systems)."
+  exit 1;
+fi
+
+numleaves=$1
+lang=$2
+dir=${@: -1}  # last argument to the script
+shift 2;
+data_and_alidirs=( $@ )  # read the remaining arguments into an array
+unset data_and_alidirs[${#data_and_alidirs[@]}-1]  # 'pop' the last argument which is odir
+num_sys=$[${#data_and_alidirs[@]}]  # number of systems to combine
+
+if (( $num_sys % 2 != 0 )); then
+  echo "$0: The data and alignment arguments must be an even number of arguments."
+  exit 1
+fi
+
+num_sys=$((num_sys / 2))
+
+data=$dir/data_tmp
+mkdir -p $data
+
+mkdir -p $dir
+alidir=`echo ${data_and_alidirs[1]}`
+
+datadirs=()
+alidirs=()
+for n in `seq 0 $[num_sys-1]`; do
+  datadirs[$n]=${data_and_alidirs[$[2*n]]}
+  alidirs[$n]=${data_and_alidirs[$[2*n+1]]}
+done
+
+utils/combine_data.sh $data ${datadirs[@]} || exit 1
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` || exit 1
+delta_opts=`cat $alidir/delta_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+cp $alidir/delta_opts $dir 2>/dev/null # delta option.
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+
+echo "$0: feature type is $feat_type"
+
+feats=()
+feats_one=()
+for n in `seq 0 $[num_sys-1]`; do
+  this_nj=$(cat ${alidirs[$n]}/num_jobs) || exit 1
+  this_sdata=${datadirs[$n]}/split$this_nj
+  [[ -d $this_sdata && ${datadirs[$n]}/feats.scp -ot $this_sdata ]] || split_data.sh ${datadirs[$n]} $this_nj || exit 1;
+  ## Set up speaker-independent features.
+  case $feat_type in
+    delta) feats[$n]="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$this_sdata/JOB/utt2spk scp:$this_sdata/JOB/cmvn.scp scp:$this_sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |"
+      feats_one[$n]="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$this_sdata/1/utt2spk scp:$this_sdata/1/cmvn.scp scp:$this_sdata/1/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+    lda) feats[$n]="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$this_sdata/JOB/utt2spk scp:$this_sdata/JOB/cmvn.scp scp:$this_sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+      feats_one[$n]="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$this_sdata/1/utt2spk scp:$this_sdata/1/cmvn.scp scp:$this_sdata/1/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+      cp $alidir/final.mat $dir
+      cp $alidir/full.mat $dir 2>/dev/null
+      ;;
+    *) echo "$0: invalid feature type $feat_type" && exit 1;
+  esac
+  
+  if $use_fmllr; then
+    if [ ! -f ${alidirs[$n]}/trans.1 ]; then
+      echo "$0: Could not find fMLLR transforms in ${alidirs[$n]}"
+      exit 1
+    fi
+
+    echo "$0: Using transforms from ${alidirs[$n]}"
+    feats[i]="${feats[i]} transform-feats --utt2spk=ark:$this_sdata/JOB/utt2spk ark,s,cs:${alidirs[$n]}/trans.JOB ark:- ark:- |"
+    feats_one[i]="${feats_one[i]} transform-feats --utt2spk=ark:$this_sdata/1/utt2spk ark,s,cs:${alidirs[$n]}/trans.1 ark:- ark:- |"
+  fi
+
+  # Do subsampling of feats, if needed
+  if [ $frame_subsampling_factor -gt 1 ]; then
+    feats[$n]="${feats[$n]} subsample-feats --n=$frame_subsampling_factor ark:- ark:- |"
+    feats_one[$n]="${feats_one[$n]} subsample-feats --n=$frame_subsampling_factor ark:- ark:- |"
+  fi
+done
+
+if [ $stage -le -5 ]; then
+  echo "$0: Initializing monophone model (for alignment conversion, in case topology changed)"
+
+  [ ! -f $lang/phones/sets.int ] && exit 1;
+  shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+  # get feature dimension
+  example_feats="`echo ${feats[0]} | sed s/JOB/1/g`";
+  if ! feat_dim=$(feat-to-dim "$example_feats" - 2>/dev/null) || [ -z $feat_dim ]; then
+    feat-to-dim "$example_feats" - # to see the error message.
+    echo "error getting feature dimension"
+    exit 1;
+  fi
+
+  for n in `seq 0 $[num_sys-1]`; do
+    copy-feats "${feats_one[$n]}" ark:-
+  done | copy-feats ark:- ark:$dir/tmp.ark
+  
+  $cmd $dir/log/init_mono.log \
+    gmm-init-mono $shared_phones_opt \
+      "--train-feats=ark:subset-feats --n=10 ark:$dir/tmp.ark ark:- |" $lang/topo $feat_dim \
+    $dir/mono.mdl $dir/mono.tree || exit 1
+fi
+
+
+if [ $stage -le -4 ]; then
+  # Get tree stats.
+
+  for n in `seq 0 $[num_sys-1]`; do
+    echo "$0: Accumulating tree stats"
+    this_data=${datadirs[$n]}
+    this_alidir=${alidirs[$n]}
+    this_nj=$(cat $this_alidir/num_jobs) || exit 1
+    this_frame_subsampling_factor=1
+    if [ -f $this_alidir/frame_subsampling_factor ]; then
+      this_frame_subsampling_factor=$(cat $this_alidir/frame_subsampling_factor)
+    fi
+
+    if (( $frame_subsampling_factor % $this_frame_subsampling_factor != 0 )); then
+      echo "$0: frame-subsampling-factor=$frame_subsampling_factor is not "
+      echo "divisible by $this_frame_subsampling_factor (that of $this_alidir)"
+      exit 1
+    fi
+
+    this_frame_subsampling_factor=$((frame_subsampling_factor / this_frame_subsampling_factor))
+    $cmd JOB=1:$this_nj $dir/log/acc_tree.$n.JOB.log \
+       convert-ali --frame-subsampling-factor=$this_frame_subsampling_factor \
+           $this_alidir/final.mdl $dir/mono.mdl $dir/mono.tree "ark:gunzip -c $this_alidir/ali.JOB.gz|" ark:-  \| \
+        acc-tree-stats $context_opts $tree_stats_opts --ci-phones=$ciphonelist $dir/mono.mdl \
+           "${feats[$n]}" ark:- $dir/$n.JOB.treeacc || exit 1;
+    [ "`ls $dir/$n.*.treeacc | wc -w`" -ne "$this_nj" ] && echo "$0: Wrong #tree-accs for data $n $this_data" && exit 1;
+  done
+
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  $cmd $dir/log/questions.log \
+     cluster-phones $cluster_phones_opts $context_opts $dir/treeacc \
+     $lang/phones/sets.int $dir/questions.int || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  $cmd $dir/log/compile_questions.log \
+    compile-questions \
+      $context_opts $lang/topo $dir/questions.int $dir/questions.qst || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments to the new tree.  Note: we likely will not use these
+  # converted alignments in the CTC system directly, but they could be useful
+  # for other purposes.
+
+  for n in `seq 0 $[num_sys-1]`; do
+    this_alidir=${alidirs[$n]}
+    this_nj=$(cat $this_alidir/num_jobs) || exit 1
+    
+    this_frame_subsampling_factor=1
+    if [ -f $this_alidir/frame_subsampling_factor ]; then
+      this_frame_subsampling_factor=$(cat $this_alidir/frame_subsampling_factor)
+    fi
+
+    if (( $frame_subsampling_factor % $this_frame_subsampling_factor != 0 )); then
+      echo "$0: frame-subsampling-factor=$frame_subsampling_factor is not "
+      echo "divisible by $this_frame_subsampling_factor (hat of $this_alidir)"
+      exit 1
+    fi
+
+    echo "$0: frame-subsampling-factor for $this_alidir is $this_frame_subsampling_factor"
+
+    this_frame_subsampling_factor=$((frame_subsampling_factor / this_frame_subsampling_factor))
+    echo "$0: Converting alignments from $this_alidir to use current tree"
+    $cmd JOB=1:$this_nj $dir/log/convert.$n.JOB.log \
+      convert-ali --repeat-frames=$repeat_frames \
+        --frame-subsampling-factor=$this_frame_subsampling_factor \
+        $this_alidir/final.mdl $dir/1.mdl $dir/tree "ark:gunzip -c $this_alidir/ali.JOB.gz |" \
+        ark,scp:$dir/ali.$n.JOB.ark,$dir/ali.$n.JOB.scp || exit 1
+
+    for i in `seq $this_nj`; do 
+      cat $dir/ali.$n.$i.scp 
+    done > $dir/ali.$n.scp || exit 1
+  done
+
+  for n in `seq 0 $[num_sys-1]`; do
+    cat $dir/ali.$n.scp
+  done | sort -k1,1 > $dir/ali.scp || exit 1
+
+  utils/split_data.sh $data $nj
+  $cmd JOB=1:$nj $dir/log/copy_alignments.JOB.log \
+    copy-int-vector "scp:utils/filter_scp.pl $data/split$nj/JOB/utt2spk $dir/ali.scp |" \
+    "ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
+fi
+
+cp $dir/1.mdl $dir/final.mdl
+
+echo $0: Done building tree
diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
index cec6f8e166f..f3202778daa 100755
--- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
@@ -63,6 +63,17 @@ online_ivector_dir=  # can be used if we are including speaker information as iV
 cmvn_opts=  # can be used for specifying CMVN options, if feature type is not lda (if lda,
             # it doesn't make sense to use different options than were used as input to the
             # LDA transform).  This is used to turn off CMVN in the online-nnet experiments.
+lattice_lm_scale=     # If supplied, the graph/lm weight of the lattices will be
+                      # used (with this scale) in generating supervisions
+egs_weight=1.0    # The weight which determines how much each training example
+                           # contributes to gradients while training (can be used
+                           # to down/up-weight a dataset)
+lattice_prune_beam=         # If supplied, the lattices will be pruned to this beam,
+                            # before being used to get supervisions.
+acwt=0.1   # For pruning
+phone_insertion_penalty=
+deriv_weights_scp=
+generate_egs_scp=false
 
 echo "$0 $@"  # Print the command line for logging
 
@@ -184,6 +195,8 @@ if [ -f $dir/trans.scp ]; then
   train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk scp:$dir/trans.scp ark:- ark:- |"
 fi
 
+tree-info $chaindir/tree | grep num-pdfs | awk '{print $2}' > $dir/info/num_pdfs || exit 1
+
 if [ ! -z "$online_ivector_dir" ]; then
   ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
   echo $ivector_dim > $dir/info/ivector_dim
@@ -260,8 +273,10 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: copying training lattices"
 
+  [ ! -z $lattice_prune_beam ] && \
+    prune_cmd="ark:- | lattice-prune --acoustic-scale=$acwt --beam=$lattice_prune_beam ark:-"
   $cmd --max-jobs-run 6 JOB=1:$num_lat_jobs $dir/log/lattice_copy.JOB.log \
-    lattice-copy "ark:gunzip -c $latdir/lat.JOB.gz|" ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp || exit 1;
+    lattice-copy "ark:gunzip -c $latdir/lat.JOB.gz|" $prune_cmd ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp || exit 1;
 
   for id in $(seq $num_lat_jobs); do cat $dir/lat.$id.scp; done > $dir/lat.scp
 fi
@@ -271,6 +286,7 @@ egs_opts="--left-context=$left_context --right-context=$right_context --num-fram
 [ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial"
 [ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final"
 
+[ ! -z "$deriv_weights_scp" ] && egs_opts="$egs_opts --deriv-weights-rspecifier=scp:$deriv_weights_scp"
 
 chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$alignment_subsampling_factor"
 [ ! -z $right_tolerance ] && \
@@ -279,6 +295,20 @@ chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$ali
 [ ! -z $left_tolerance ] && \
   chain_supervision_all_opts="$chain_supervision_all_opts --left-tolerance=$left_tolerance"
 
+normalization_scale=1.0
+if [ ! -z "$lattice_lm_scale" ]; then
+  chain_supervision_all_opts="$chain_supervision_all_opts --lm-scale=$lattice_lm_scale"
+  normalization_scale=$(perl -e "
+  if ($lattice_lm_scale >= 1.0 || $lattice_lm_scale < 0) { 
+    print STDERR \"Invalid --lattice-lm-scale $lattice_lm_scale\";
+    exit(1); 
+  } 
+  print (1.0 - $lattice_lm_scale);")
+fi
+
+[ ! -z $phone_insertion_penalty ] && \
+  chain_supervision_all_opts="$chain_supervision_all_opts --phone-ins-penalty=$phone_insertion_penalty"
+
 echo $left_context > $dir/info/left_context
 echo $right_context > $dir/info/right_context
 echo $left_context_initial > $dir/info/left_context_initial
@@ -299,7 +329,7 @@ if [ $stage -le 3 ]; then
     chain-get-supervision $chain_supervision_all_opts $chaindir/tree $chaindir/0.trans_mdl \
       ark:- ark:- \| \
     nnet3-chain-get-egs $ivector_opts --srand=$srand \
-      $egs_opts $chaindir/normalization.fst \
+      $egs_opts --normalization-scale=$normalization_scale $chaindir/normalization.fst \
       "$valid_feats" ark,s,cs:- "ark:$dir/valid_all.cegs" || touch $dir/.error &
   $cmd $dir/log/create_train_subset.log \
     utils/filter_scp.pl $dir/train_subset_uttlist $dir/lat_special.scp \| \
@@ -307,27 +337,40 @@ if [ $stage -le 3 ]; then
     chain-get-supervision $chain_supervision_all_opts \
       $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \
     nnet3-chain-get-egs $ivector_opts --srand=$srand \
-      $egs_opts $chaindir/normalization.fst \
+      $egs_opts --normalization-scale=$normalization_scale $chaindir/normalization.fst \
       "$train_subset_feats" ark,s,cs:- "ark:$dir/train_subset_all.cegs" || touch $dir/.error &
   wait;
   [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
   echo "... Getting subsets of validation examples for diagnostics and combination."
+  if $generate_egs_scp; then
+    valid_diagnostic_output="ark,scp:$dir/valid_diagnostic.cegs,$dir/valid_diagnostic.scp"
+    train_diagnostic_output="ark,scp:$dir/train_diagnostic.cegs,$dir/train_diagnostic.scp"
+  else
+    valid_diagnostic_output="ark:$dir/valid_diagnostic.cegs"
+    train_diagnostic_output="ark:$dir/train_diagnostic.cegs"
+  fi
   $cmd $dir/log/create_valid_subset_combine.log \
     nnet3-chain-subset-egs --n=$num_valid_egs_combine ark:$dir/valid_all.cegs \
     ark:$dir/valid_combine.cegs || touch $dir/.error &
   $cmd $dir/log/create_valid_subset_diagnostic.log \
     nnet3-chain-subset-egs --n=$num_egs_diagnostic ark:$dir/valid_all.cegs \
-    ark:$dir/valid_diagnostic.cegs || touch $dir/.error &
+    $valid_diagnostic_output || touch $dir/.error &
 
   $cmd $dir/log/create_train_subset_combine.log \
     nnet3-chain-subset-egs --n=$num_train_egs_combine ark:$dir/train_subset_all.cegs \
     ark:$dir/train_combine.cegs || touch $dir/.error &
   $cmd $dir/log/create_train_subset_diagnostic.log \
     nnet3-chain-subset-egs --n=$num_egs_diagnostic ark:$dir/train_subset_all.cegs \
-    ark:$dir/train_diagnostic.cegs || touch $dir/.error &
+    $train_diagnostic_output || touch $dir/.error &
   wait
   sleep 5  # wait for file system to sync.
-  cat $dir/valid_combine.cegs $dir/train_combine.cegs > $dir/combine.cegs
+  if $generate_egs_scp; then
+    cat $dir/valid_combine.cegs $dir/train_combine.cegs | \
+      nnet3-chain-copy-egs ark:- ark,scp:$dir/combine.cegs,$dir/combine.scp
+    rm $dir/{train,valid}_combine.scp
+  else
+    cat $dir/valid_combine.cegs $dir/train_combine.cegs > $dir/combine.cegs
+  fi
 
   for f in $dir/{combine,train_diagnostic,valid_diagnostic}.cegs; do
     [ ! -s $f ] && echo "No examples in file $f" && exit 1;
@@ -357,6 +400,7 @@ if [ $stage -le 4 ]; then
     utils/filter_scp.pl $sdata/JOB/utt2spk $dir/lat.scp \| \
     lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:- ark:- \| \
     chain-get-supervision $chain_supervision_all_opts \
+      --weight=$egs_weight \
       $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \
     nnet3-chain-get-egs $ivector_opts --srand=\$[JOB+$srand] $egs_opts \
       --num-frames-overlap=$frames_overlap_per_eg \
@@ -376,16 +420,34 @@ if [ $stage -le 5 ]; then
   done
 
   if [ $archives_multiple == 1 ]; then # normal case.
+    if $generate_egs_scp; then
+      output_archive="ark,scp:$dir/cegs.JOB.ark,$dir/cegs.JOB.scp"
+    else
+      output_archive="ark:$dir/cegs.JOB.ark"
+    fi
     $cmd --max-jobs-run $max_shuffle_jobs_run --mem 8G JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \
-      nnet3-chain-normalize-egs $chaindir/normalization.fst "ark:cat $egs_list|" ark:- \| \
-      nnet3-chain-shuffle-egs --srand=\$[JOB+$srand] ark:- ark:$dir/cegs.JOB.ark  || exit 1;
+      nnet3-chain-normalize-egs --normalization-scale=$normalization_scale $chaindir/normalization.fst "ark:cat $egs_list|" ark:- \| \
+      nnet3-chain-shuffle-egs --srand=\$[JOB+$srand] ark:- $output_archive || exit 1;
+    
+    if $generate_egs_scp; then
+      #concatenate cegs.JOB.scp in single cegs.scp
+      rm -rf $dir/cegs.scp
+      for j in $(seq $num_archives_intermediate); do
+        cat $dir/cegs.$j.scp || exit 1;
+      done > $dir/cegs.scp || exit 1;
+      for f in $dir/cegs.*.scp; do rm $f; done
+    fi
   else
     # we need to shuffle the 'intermediate archives' and then split into the
     # final archives.  we create soft links to manage this splitting, because
     # otherwise managing the output names is quite difficult (and we don't want
     # to submit separate queue jobs for each intermediate archive, because then
     # the --max-jobs-run option is hard to enforce).
-    output_archives="$(for y in $(seq $archives_multiple); do echo ark:$dir/cegs.JOB.$y.ark; done)"
+    if $generate_egs_scp; then
+      output_archives="$(for y in $(seq $archives_multiple); do echo ark,scp:$dir/cegs.JOB.$y.ark,$dir/cegs.JOB.$y.scp; done)"
+    else
+      output_archives="$(for y in $(seq $archives_multiple); do echo ark:$dir/cegs.JOB.$y.ark; done)"
+    fi
     for x in $(seq $num_archives_intermediate); do
       for y in $(seq $archives_multiple); do
         archive_index=$[($x-1)*$archives_multiple+$y]
@@ -394,9 +456,20 @@ if [ $stage -le 5 ]; then
       done
     done
     $cmd --max-jobs-run $max_shuffle_jobs_run --mem 8G JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \
-      nnet3-chain-normalize-egs $chaindir/normalization.fst "ark:cat $egs_list|" ark:- \| \
+      nnet3-chain-normalize-egs --normalization-scale=$normalization_scale $chaindir/normalization.fst "ark:cat $egs_list|" ark:- \| \
       nnet3-chain-shuffle-egs --srand=\$[JOB+$srand] ark:- ark:- \| \
       nnet3-chain-copy-egs ark:- $output_archives || exit 1;
+    
+    if $generate_egs_scp; then
+      #concatenate cegs.JOB.scp in single cegs.scp
+      rm -rf $dir/cegs.scp
+      for j in $(seq $num_archives_intermediate); do
+        for y in $(seq $num_archives_intermediate); do
+          cat $dir/cegs.$j.$y.scp || exit 1;
+        done
+      done > $dir/cegs.scp || exit 1;
+      for f in $dir/cegs.*.*.scp; do rm $f; done
+    fi
   fi
 fi
 
diff --git a/egs/wsj/s5/steps/nnet3/chain/make_den_fst.sh b/egs/wsj/s5/steps/nnet3/chain/make_den_fst.sh
new file mode 100755
index 00000000000..3467e887cd5
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain/make_den_fst.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright 2014-17 Vimal Manohar
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script creates denominator FST (den.fst) and normalization.fst for 
+# chain training. It additional copies the transition model and tree from the 
+# first alignment directory to the chain directory.
+# This script can accept multiple sources of alignments that can be 
+# weighted to estimate phone LM.
+
+set -o pipefail
+
+# begin configuration section.
+cmd=run.pl
+stage=-10
+weights=
+#end configuration section.
+
+help_message="Usage: "$(basename $0)" [options] <ali-dir1> [<ali-dir2> ...] <out-dir>
+     E.g. "$(basename $0)" exp/tri1_ali exp/tri2_ali exp/chain/tdnn_1a_sp
+Options:
+  --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes.
+";
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -lt 2 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+dir=${@: -1}  # last argument to the script
+ali_dirs=( $@ )  # read the remaining arguments into an array
+unset ali_dirs[${#ali_dirs[@]}-1]  # 'pop' the last argument which is odir
+num_sys=${#ali_dirs[@]}  # number of systems to combine
+
+mkdir -p $dir/log
+
+ali_dir=`echo ${ali_dirs[0]} | cut -d: -f1`
+
+for f in $ali_dir/ali.1.gz $ali_dir/final.mdl $ali_dir/tree; do
+  if [ ! -f $f ]; then
+    echo "$0: Could not find file $f"
+    exit 1
+  fi
+done
+
+cp $ali_dir/tree $dir/ || exit 1
+
+for n in `seq 0 $[num_sys-1]`; do
+  adir=${ali_dirs[$n]}
+  alignments+=("ark:gunzip -c $adir/ali.*.gz | ali-to-phones $adir/final.mdl ark:- ark:- |")
+done
+
+if [ $stage -le 1 ]; then
+  $cmd $dir/log/make_phone_lm.log \
+    chain-est-phone-lm $lm_opts --scales="$weights" \
+    "${alignments[@]}" $dir/phone_lm.fst || exit 1
+fi 
+
+if [ $stage -le 2 ]; then
+  copy-transition-model $ali_dir/final.mdl $dir/0.trans_mdl
+fi
+
+if [ $stage -le 3 ]; then
+  $cmd $dir/log/make_den_fst.log \
+    chain-make-den-fst $dir/tree $dir/0.trans_mdl \
+    $dir/phone_lm.fst \
+    $dir/den.fst $dir/normalization.fst || exit 1
+fi
+
+exit 0
diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py
index 6f9452c457c..c611d10edb1 100755
--- a/egs/wsj/s5/steps/nnet3/chain/train.py
+++ b/egs/wsj/s5/steps/nnet3/chain/train.py
@@ -377,13 +377,23 @@ def train(args, run_opts):
     logger.info("Copying the properties from {0} to {1}".format(egs_dir, args.dir))
     common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir)
 
+    if not os.path.exists('{0}/valid_diagnostic.cegs'.format(egs_dir)):
+        if (not os.path.exists('{0}/valid_diagnostic.scp'.format(egs_dir))):
+            raise Exception('neither {0}/valid_diagnostic.cegs nor '
+                            '{0}/valid_diagnostic.scp exist.'
+                            'This script expects one of them.'.format(egs_dir))
+        use_multitask_egs = True
+    else:
+        use_multitask_egs = False
+
     if (args.stage <= -2) and os.path.exists(args.dir+"/configs/init.config"):
         logger.info('Computing the preconditioning matrix for input features')
 
         chain_lib.compute_preconditioning_matrix(
             args.dir, egs_dir, num_archives, run_opts,
             max_lda_jobs=args.max_lda_jobs,
-            rand_prune=args.rand_prune)
+            rand_prune=args.rand_prune,
+            use_multitask_egs=use_multitask_egs)
 
     if (args.stage <= -1):
         logger.info("Preparing the initial acoustic model.")
@@ -477,7 +487,8 @@ def train(args, run_opts):
                 frame_subsampling_factor=args.frame_subsampling_factor,
                 run_opts=run_opts,
                 backstitch_training_scale=args.backstitch_training_scale,
-                backstitch_training_interval=args.backstitch_training_interval)
+                backstitch_training_interval=args.backstitch_training_interval,
+                use_multitask_egs=use_multitask_egs)
 
             if args.cleanup:
                 # do a clean up everythin but the last 2 models, under certain
@@ -512,7 +523,8 @@ def train(args, run_opts):
                 l2_regularize=args.l2_regularize,
                 xent_regularize=args.xent_regularize,
                 run_opts=run_opts,
-                sum_to_one_penalty=args.combine_sum_to_one_penalty)
+                sum_to_one_penalty=args.combine_sum_to_one_penalty,
+                use_multitask_egs=use_multitask_egs)
         else:
             logger.info("Copying the last-numbered model to final.mdl")
             common_lib.force_symlink("{0}.mdl".format(num_iters),
diff --git a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
index 6372ba25e5e..860c444e342 100755
--- a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
+++ b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
@@ -98,6 +98,13 @@ def get_args():
     parser.add_argument("--samples-per-iter", type=int, default=40000,
                         help="The target number of egs in each archive of egs, "
                         "(prior to merging egs). ")
+    parser.add_argument("--frames-per-iter", type=int, default=400000,
+                        help="The target number of frames in each archive of "
+                        "egs")
+    parser.add_argument("--frames-per-eg-list", type=str, default=None,
+                        action=common_lib.NullstrToNoneAction,
+                        help="Number of frames per eg for each input language "
+                        "as a comma separated list")
     parser.add_argument("--num-jobs", type=int, default=20,
                         help="This can be used for better randomization in distributing "
                         "examples for different languages across egs.*.scp files, "
@@ -107,7 +114,7 @@ def get_args():
                         help="If true, egs.ranges.*.txt are generated "
                         "randomly w.r.t distribution of remaining examples in "
                         "each language, otherwise it is generated sequentially.",
-                        default=True, choices = ["false", "true"])
+                        default=True, choices=["false", "true"])
     parser.add_argument("--max-archives", type=int, default=1000,
                         help="max number of archives used to generate egs.*.scp")
     parser.add_argument("--seed", type=int, default=1,
@@ -129,7 +136,7 @@ def get_args():
 # now the positional arguments
     parser.add_argument("egs_scp_lists", nargs='+',
                         help="list of egs.scp files per input language."
-                           "e.g. exp/lang1/egs/egs.scp exp/lang2/egs/egs.scp")
+                        "e.g. exp/lang1/egs/egs.scp exp/lang2/egs/egs.scp")
     parser.add_argument("egs_dir",
                         help="Name of egs directory e.g. exp/tdnn_multilingual_sp/egs")
 
@@ -153,7 +160,7 @@ def select_random_lang(lang_len, tot_egs, random_selection):
     count = 0
     for l in range(len(lang_len)):
         if random_selection:
-            if  rand_int <= (count + lang_len[l]):
+            if rand_int <= (count + lang_len[l]):
                 return l
             else:
                 count += lang_len[l]
@@ -172,6 +179,10 @@ def process_multilingual_egs(args):
     scp_lists = args.egs_scp_lists
     num_langs = len(scp_lists)
 
+    frames_per_eg = ([1 for x in scp_lists]
+                     if args.frames_per_eg_list is None
+                     else [int(x) for x in args.frames_per_eg_list.split(',')])
+
     scp_files = [open(scp_lists[lang], 'r') for lang in range(num_langs)]
 
     lang2len = [0] * num_langs
@@ -182,7 +193,7 @@ def process_multilingual_egs(args):
 
     # If weights are not provided, the weights are 1.0.
     if args.lang2weight is None:
-        lang2weight = [ 1.0 ] * num_langs
+        lang2weight = [1.0] * num_langs
     else:
         lang2weight = args.lang2weight.split(",")
         assert(len(lang2weight) == num_langs)
@@ -195,10 +206,16 @@ def process_multilingual_egs(args):
     # Each element of all_egs (one per num_archive * num_jobs) is
     # an array of 3-tuples (lang-id, local-start-egs-line, num-egs)
     all_egs = []
-    lang_len = lang2len[:]
-    # total num of egs in all languages
-    tot_num_egs = sum(lang2len[i] for i in range(len(lang2len)))
-    num_archives = max(1, min(args.max_archives, tot_num_egs / args.samples_per_iter))
+    num_frames_in_lang = [frames_per_eg[i] * lang2len[i]
+                          for i in range(num_langs)]
+    for lang in range(num_langs):
+        logger.info("Number of frames for language {0} "
+                    "is {1}.".format(lang, num_frames_in_lang[lang]))
+
+    # total num of frames in all languages
+    tot_num_frames = sum(num_frames_in_lang[i] for i in range(num_langs))
+    num_archives = max(1, min(args.max_archives,
+                              tot_num_frames / args.frames_per_iter))
 
     num_arch_file = open("{0}/info/{1}num_archives".format(
                             args.egs_dir,
@@ -206,7 +223,7 @@ def process_multilingual_egs(args):
                          "w")
     print("{0}".format(num_archives), file=num_arch_file)
     num_arch_file.close()
-    this_num_egs_per_archive = tot_num_egs / (num_archives * args.num_jobs)
+    this_num_frames_per_archive = tot_num_frames / (num_archives * args.num_jobs)
 
     logger.info("Generating {0}scp.<job>.<archive_index> temporary files used to "
                 "generate {0}<archive_index>.scp.".format(args.egs_prefix))
@@ -216,29 +233,36 @@ def process_multilingual_egs(args):
                             "".format(args.egs_dir, args.egs_prefix,
                                       job + 1, archive_index + 1),
                             "w")
-            this_egs = [] # this will be array of 2-tuples (lang-id start-frame num-frames)
+            # this will be array of 2-tuples (lang-id start-frame num-frames)
+            this_egs = []
 
             num_egs = 0
-            while num_egs <= this_num_egs_per_archive:
-                num_left_egs = sum(num_left_egs_per_lang for
-                                   num_left_egs_per_lang in lang_len)
-                if num_left_egs > 0:
-                    lang_id = select_random_lang(lang_len, num_left_egs, rand_select)
-                    start_egs = lang2len[lang_id] - lang_len[lang_id]
+            num_frames = 0
+            while num_frames <= this_num_frames_per_archive:
+                num_frames_left = sum(num_frames_in_lang)
+                if num_frames_left > 0:
+                    lang_id = select_random_lang(num_frames_in_lang,
+                                                 num_frames_left, rand_select)
+                    start_egs = (
+                        lang2len[lang_id]
+                        - num_frames_in_lang[lang_id] / frames_per_eg[lang_id])
                     this_egs.append((lang_id, start_egs, args.minibatch_size))
                     for scpline in range(args.minibatch_size):
                         scp_key = scp_files[lang_id].readline().splitlines()[0]
                         print("{0} {1}".format(scp_key, lang_id),
                               file=archfile)
 
-                    lang_len[lang_id] = lang_len[lang_id] - args.minibatch_size
-                    num_egs = num_egs + args.minibatch_size
+                    num_frames_in_lang[lang_id] -= (
+                        args.minibatch_size * frames_per_eg[lang_id])
+                    num_egs += args.minibatch_size
+                    num_frames += args.minibatch_size * frames_per_eg[lang_id]
                     # If num of remaining egs in each lang is less than minibatch_size,
                     # they are discarded.
-                    if lang_len[lang_id] < args.minibatch_size:
-                        lang_len[lang_id] = 0
-                        logger.info("Done processing data for language {0}".format(
-                            lang_id))
+                    if (num_frames_in_lang[lang_id]
+                            < args.minibatch_size * frames_per_eg[lang_id]):
+                        num_frames_in_lang[lang_id] = 0
+                        logger.info("Done processing data for language {0}"
+                                    "".format(lang_id))
                 else:
                     logger.info("Done processing data for all languages.")
                     break
@@ -315,4 +339,4 @@ def main():
 
 
 if __name__ == "__main__":
-  main()
+    main()
diff --git a/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh b/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
index 3826dad11a9..dd8d9714905 100755
--- a/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
@@ -19,13 +19,15 @@ minibatch_size=512      # it is the number of consecutive egs that we take from
                         # access. This does not have to be the actual minibatch size;
 num_jobs=10             # helps for better randomness across languages
                         # per archive.
-samples_per_iter=400000 # this is the target number of egs in each archive of egs
+frames_per_iter=400000 # this is the target number of egs in each archive of egs
                         # (prior to merging egs).  We probably should have called
                         # it egs_per_iter. This is just a guideline; it will pick
                         # a number that divides the number of samples in the
                         # entire data.
 lang2weight=            # array of weights one per input languge to scale example's output
                         # w.r.t its input language during training.
+allocate_opts=
+egs_prefix=egs.
 stage=0
 
 echo "$0 $@"  # Print the command line for logging
@@ -33,6 +35,12 @@ echo "$0 $@"  # Print the command line for logging
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
 
+if [ $# -lt 3 ]; then
+  echo "Usage:$0 [opts] <num-input-langs,N> <lang1-egs-dir> ...<langN-egs-dir> <multilingual-egs-dir>"
+  echo "Usage:$0 [opts] 2 exp/lang1/egs exp/lang2/egs exp/multi/egs"
+  exit 1;
+fi
+
 num_langs=$1
 
 shift 1
@@ -47,7 +55,8 @@ if [ ${#args[@]} != $[$num_langs+1] ]; then
   exit 1;
 fi
 
-required="egs.scp combine.scp train_diagnostic.scp valid_diagnostic.scp"
+required="${egs_prefix}scp combine.scp train_diagnostic.scp valid_diagnostic.scp"
+frames_per_eg_list=
 train_scp_list=
 train_diagnostic_scp_list=
 valid_diagnostic_scp_list=
@@ -55,13 +64,14 @@ combine_scp_list=
 
 # read paramter from $egs_dir[0]/info and cmvn_opts
 # to write in multilingual egs_dir.
-check_params="info/feat_dim info/ivector_dim info/left_context info/right_context info/frames_per_eg cmvn_opts"
+check_params="info/feat_dim info/ivector_dim info/left_context info/right_context cmvn_opts"
 ivec_dim=`cat ${args[0]}/info/ivector_dim`
 if [ $ivec_dim -ne 0 ];then check_params="$check_params info/final.ie.id"; fi
 
 for param in $check_params; do
-    cat ${args[0]}/$param > $megs_dir/$param || exit 1;
+  cat ${args[0]}/$param > $megs_dir/$param || exit 1;
 done
+cat ${args[0]}/cmvn_opts > $megs_dir/cmvn_opts || exit 1; # caution: the top-level nnet training
 
 for lang in $(seq 0 $[$num_langs-1]);do
   multi_egs_dir[$lang]=${args[$lang]}
@@ -70,10 +80,19 @@ for lang in $(seq 0 $[$num_langs-1]);do
       echo "$0: no such file ${multi_egs_dir[$lang]}/$f." && exit 1;
     fi
   done
-  train_scp_list="$train_scp_list ${args[$lang]}/egs.scp"
+  train_scp_list="$train_scp_list ${args[$lang]}/${egs_prefix}scp"
   train_diagnostic_scp_list="$train_diagnostic_scp_list ${args[$lang]}/train_diagnostic.scp"
   valid_diagnostic_scp_list="$valid_diagnostic_scp_list ${args[$lang]}/valid_diagnostic.scp"
   combine_scp_list="$combine_scp_list ${args[$lang]}/combine.scp"
+  
+  this_frames_per_eg=$(cat ${args[$lang]}/info/frames_per_eg)
+
+  if [ $lang -eq 0 ]; then
+    frames_per_eg_list="$this_frames_per_eg"
+    echo $this_frames_per_eg > $megs_dir/info/frames_per_eg
+  else
+    frames_per_eg_list="$frames_per_eg_list,$this_frames_per_eg"
+  fi
 
   # check parameter dimension to be the same in all egs dirs
   for f in $check_params; do
@@ -90,16 +109,18 @@ for lang in $(seq 0 $[$num_langs-1]);do
   done
 done
 
+if [ ! -z "$lang2weight" ]; then
+  egs_opt="--lang2weight '$lang2weight'"
+fi
+
 if [ $stage -le 0 ]; then
   echo "$0: allocating multilingual examples for training."
-  if [ ! -z "$lang2weight" ]; then
-    egs_opt="--lang2weight '$lang2weight'"
-  fi
-  # Generate egs.*.scp for multilingual setup.
+  # Generate ${egs_prefix}*.scp for multilingual setup.
   $cmd $megs_dir/log/allocate_multilingual_examples_train.log \
   steps/nnet3/multilingual/allocate_multilingual_examples.py $egs_opt \
-      --minibatch-size $minibatch_size \
-      --samples-per-iter $samples_per_iter \
+      ${allocate_opts} --minibatch-size $minibatch_size \
+      --frames-per-iter $frames_per_iter --frames-per-eg-list $frames_per_eg_list \
+      --egs-prefix "$egs_prefix" \
       $train_scp_list $megs_dir || exit 1;
 fi
 
@@ -107,20 +128,20 @@ if [ $stage -le 1 ]; then
   echo "$0: combine combine.scp examples from all langs in $megs_dir/combine.scp."
   # Generate combine.scp for multilingual setup.
   $cmd $megs_dir/log/allocate_multilingual_examples_combine.log \
-  steps/nnet3/multilingual/allocate_multilingual_examples.py \
-      --random-lang false \
-      --max-archives 1 --num-jobs 1 \
-      --minibatch-size $minibatch_size \
+  steps/nnet3/multilingual/allocate_multilingual_examples.py $egs_opt \
+      --random-lang false --max-archives 1 --num-jobs 1 \
+      --frames-per-eg-list $frames_per_eg_list \
+      ${allocate_opts} --minibatch-size $minibatch_size \
       --egs-prefix "combine." \
       $combine_scp_list $megs_dir || exit 1;
 
   echo "$0: combine train_diagnostic.scp examples from all langs in $megs_dir/train_diagnostic.scp."
   # Generate train_diagnostic.scp for multilingual setup.
   $cmd $megs_dir/log/allocate_multilingual_examples_train_diagnostic.log \
-  steps/nnet3/multilingual/allocate_multilingual_examples.py \
-      --random-lang false \
-      --max-archives 1 --num-jobs 1 \
-      --minibatch-size $minibatch_size \
+  steps/nnet3/multilingual/allocate_multilingual_examples.py $egs_opt \
+      --random-lang false --max-archives 1 --num-jobs 1 \
+      --frames-per-eg-list $frames_per_eg_list \
+      ${allocate_opts} --minibatch-size $minibatch_size \
       --egs-prefix "train_diagnostic." \
       $train_diagnostic_scp_list $megs_dir || exit 1;
 
@@ -128,9 +149,10 @@ if [ $stage -le 1 ]; then
   echo "$0: combine valid_diagnostic.scp examples from all langs in $megs_dir/valid_diagnostic.scp."
   # Generate valid_diagnostic.scp for multilingual setup.
   $cmd $megs_dir/log/allocate_multilingual_examples_valid_diagnostic.log \
-  steps/nnet3/multilingual/allocate_multilingual_examples.py \
+  steps/nnet3/multilingual/allocate_multilingual_examples.py $egs_opt \
       --random-lang false --max-archives 1 --num-jobs 1\
-      --minibatch-size $minibatch_size \
+      --frames-per-eg-list $frames_per_eg_list \
+      ${allocate_opts} --minibatch-size $minibatch_size \
       --egs-prefix "valid_diagnostic." \
       $valid_diagnostic_scp_list $megs_dir || exit 1;
 
@@ -140,6 +162,6 @@ for egs_type in combine train_diagnostic valid_diagnostic; do
   mv $megs_dir/${egs_type}.weight.1.ark $megs_dir/${egs_type}.weight.ark || exit 1;
   mv $megs_dir/${egs_type}.1.scp $megs_dir/${egs_type}.scp || exit 1;
 done
-mv $megs_dir/info/egs.num_archives $megs_dir/info/num_archives || exit 1;
-mv $megs_dir/info/egs.num_tasks $megs_dir/info/num_tasks || exit 1;
+mv $megs_dir/info/${egs_prefix}num_archives $megs_dir/info/num_archives || exit 1;
+mv $megs_dir/info/${egs_prefix}num_tasks $megs_dir/info/num_tasks || exit 1;
 echo "$0: Finished preparing multilingual training example."
diff --git a/egs/wsj/s5/steps/subset_ali_dir.sh b/egs/wsj/s5/steps/subset_ali_dir.sh
new file mode 100755
index 00000000000..c086ea39959
--- /dev/null
+++ b/egs/wsj/s5/steps/subset_ali_dir.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+# Apache 2.0.
+
+cmd=run.pl
+
+. path.sh
+
+. utils/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  cat <<EOF
+  This script creates an alignment directory containing a subset of 
+  utterances from the original alignment directory.
+
+  Usage: <subset-data-dir> <data-dir> <ali-dir> <subset-ali-dir>
+   e.g.: data/train data/train_sp exp/tri3_ali_sp exp/tri3_ali
+EOF
+fi
+
+subset_data=$1
+data=$2
+ali_dir=$3
+dir=$4
+
+nj=$(cat $ali_dir/num_jobs) || exit 1
+utils/split_data.sh $data $nj
+
+mkdir -p $dir
+cp $ali_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true
+cp -r $ali_dir/phones $dir 2>/dev/null || true
+
+$cmd JOB=1:$nj $dir/log/copy_alignments.JOB.log \
+  copy-int-vector "ark:gunzip -c $ali_dir/ali.JOB.gz |" \
+  ark,scp:$dir/ali_tmp.JOB.ark,$dir/ali_tmp.JOB.scp || exit 1
+
+for n in `seq $nj`; do
+  cat $dir/ali_tmp.$n.scp 
+done > $dir/ali_tmp.scp
+
+num_spk=$(cat $subset_data/spk2utt | wc -l)
+if [ $num_spk -lt $nj ]; then
+  nj=$num_spk
+fi
+
+utils/split_data.sh $subset_data $nj
+$cmd JOB=1:$nj $dir/log/filter_alignments.JOB.log \
+  copy-int-vector \
+  "scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/ali_tmp.scp |" \
+  "ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
+
+echo $nj > $dir/num_jobs
+
+rm $dir/ali_tmp.*.{ark,scp} $dir/ali_tmp.scp
+
+exit 0
diff --git a/src/chain/chain-supervision.cc b/src/chain/chain-supervision.cc
index b5597b15667..fb0f0284df7 100644
--- a/src/chain/chain-supervision.cc
+++ b/src/chain/chain-supervision.cc
@@ -19,6 +19,7 @@
 
 #include "chain/chain-supervision.h"
 #include "lat/lattice-functions.h"
+#include "lat/push-lattice.h"
 #include "util/text-utils.h"
 #include "hmm/hmm-utils.h"
 #include <numeric>
@@ -142,9 +143,9 @@ bool ProtoSupervision::operator == (const ProtoSupervision &other) const {
           fst::Equal(fst, other.fst));
 }
 
-bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts,
-                                    const CompactLattice &lat,
-                                    ProtoSupervision *proto_supervision) {
+bool PhoneLatticeToProtoSupervisionInternal(const SupervisionOptions &opts,
+                                            const CompactLattice &lat,
+                                          ProtoSupervision *proto_supervision) {
   opts.Check();
   if (lat.NumStates() == 0) {
     KALDI_WARN << "Empty lattice provided";
@@ -176,9 +177,11 @@ bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts,
         return false;
       }
       proto_supervision->fst.AddArc(state,
-                                    fst::StdArc(phone, phone,
-                                                fst::TropicalWeight::One(),
-                                                lat_arc.nextstate));
+        fst::StdArc(phone, phone,
+                    fst::TropicalWeight(
+                      lat_arc.weight.Weight().Value1()
+                      * opts.lm_scale + opts.phone_ins_penalty),
+                    lat_arc.nextstate));
       int32 t_begin = std::max<int32>(0, (state_time - opts.left_tolerance)),
               t_end = std::min<int32>(num_frames,
                                       (next_state_time + opts.right_tolerance)),
@@ -189,7 +192,8 @@ bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts,
       proto_supervision->allowed_phones[t_subsampled].push_back(phone);
     }
     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
-      proto_supervision->fst.SetFinal(state, fst::TropicalWeight::One());
+      proto_supervision->fst.SetFinal(state, fst::TropicalWeight(
+            lat.Final(state).Weight().Value1() * opts.lm_scale));
       if (state_times[state] != num_frames) {
         KALDI_WARN << "Time of final state " << state << " in lattice is "
                    << "not equal to number of frames " << num_frames
@@ -207,6 +211,16 @@ bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts,
   return true;
 }
 
+bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts,
+                                    const CompactLattice &lat,
+                                    ProtoSupervision *proto_supervision) {
+  if (!PhoneLatticeToProtoSupervisionInternal(opts, lat, proto_supervision))
+    return false;
+  if (opts.lm_scale != 0.0)
+    fst::Push(&(proto_supervision->fst),
+              fst::REWEIGHT_TO_INITIAL, fst::kDelta, true);
+  return true;
+}
 
 bool TimeEnforcerFst::GetArc(StateId s, Label ilabel, fst::StdArc* oarc) {
   // the following call will do the range-check on 'ilabel'.
diff --git a/src/chain/chain-supervision.h b/src/chain/chain-supervision.h
index a94f68ade90..ce755f0cb63 100644
--- a/src/chain/chain-supervision.h
+++ b/src/chain/chain-supervision.h
@@ -50,10 +50,16 @@ struct SupervisionOptions {
   int32 left_tolerance;
   int32 right_tolerance;
   int32 frame_subsampling_factor;
+  BaseFloat weight;
+  BaseFloat lm_scale;
+  BaseFloat phone_ins_penalty;
 
   SupervisionOptions(): left_tolerance(5),
                         right_tolerance(5),
-                        frame_subsampling_factor(1) { }
+                        frame_subsampling_factor(1),
+                        weight(1.0),
+                        lm_scale(0.0),
+                        phone_ins_penalty(0.0) { }
 
   void Register(OptionsItf *opts) {
     opts->Register("left-tolerance", &left_tolerance, "Left tolerance for "
@@ -65,6 +71,13 @@ struct SupervisionOptions {
                    "frame-rate of the original alignment.  Applied after "
                    "left-tolerance and right-tolerance are applied (so they are "
                    "in terms of the original num-frames.");
+    opts->Register("weight", &weight,
+                   "Use this to set the supervision weight for training");
+    opts->Register("lm-scale", &lm_scale, "The scale with which the graph/lm "
+                    "weights from the phone lattice are included in the "
+                    "supervision fst.");
+    opts->Register("phone-ins-penalty", &phone_ins_penalty,
+                   "The penalty to penalize longer paths");
   }
   void Check() const;
 };
diff --git a/src/chain/language-model.cc b/src/chain/language-model.cc
index 41e06116ea8..d2bb073d764 100644
--- a/src/chain/language-model.cc
+++ b/src/chain/language-model.cc
@@ -26,7 +26,8 @@
 namespace kaldi {
 namespace chain {
 
-void LanguageModelEstimator::AddCounts(const std::vector<int32> &sentence) {
+void LanguageModelEstimator::AddCounts(const std::vector<int32> &sentence,
+                                       int32 weight) {
   KALDI_ASSERT(opts_.ngram_order >= 2 && "--ngram-order must be >= 2");
   KALDI_ASSERT(opts_.ngram_order >= opts_.no_prune_ngram_order);
   int32 order = opts_.ngram_order;
@@ -36,23 +37,23 @@ void LanguageModelEstimator::AddCounts(const std::vector<int32> &sentence) {
       end = sentence.end();
   for (; iter != end; ++iter) {
     KALDI_ASSERT(*iter != 0);
-    IncrementCount(history, *iter);
+    IncrementCount(history, *iter, weight);
     history.push_back(*iter);
     if (history.size() >= order)
       history.erase(history.begin());
   }
   // Probability of end of sentence.  This will end up getting ignored later, but
   // it still makes a difference for probability-normalization reasons.
-  IncrementCount(history, 0);
+  IncrementCount(history, 0, weight);
 }
 
 void LanguageModelEstimator::IncrementCount(const std::vector<int32> &history,
-                                            int32 next_phone) {
+                                            int32 next_phone, int32 weight) {
   int32 lm_state_index = FindOrCreateLmStateIndexForHistory(history);
   if (lm_states_[lm_state_index].tot_count == 0) {
     num_active_lm_states_++;
   }
-  lm_states_[lm_state_index].AddCount(next_phone, 1);
+  lm_states_[lm_state_index].AddCount(next_phone, weight);
 }
 
 void LanguageModelEstimator::SetParentCounts() {
diff --git a/src/chain/language-model.h b/src/chain/language-model.h
index b2c3f4cd746..123d5ab830f 100644
--- a/src/chain/language-model.h
+++ b/src/chain/language-model.h
@@ -91,7 +91,7 @@ class LanguageModelEstimator {
   // Adds counts for this sentence.  Basically does: for each n-gram in the
   // sentence, count[n-gram] += 1.  The only constraint on 'sentence' is that it
   // should contain no zeros.
-  void AddCounts(const std::vector<int32> &sentence);
+  void AddCounts(const std::vector<int32> &sentence, int32 weight);
 
   // Estimates the LM and outputs it as an FST.  Note: there is
   // no concept here of backoff arcs.
@@ -188,7 +188,7 @@ class LanguageModelEstimator {
 
   // adds the counts for this ngram (called from AddCounts()).
   inline void IncrementCount(const std::vector<int32> &history,
-                             int32 next_phone);
+                             int32 next_phone, int32 weight);
 
 
   // Computes whether backoff should be allowed for this lm_state.  (the caller
diff --git a/src/chainbin/chain-est-phone-lm.cc b/src/chainbin/chain-est-phone-lm.cc
index f16b3f4f14b..db16cc4d51a 100644
--- a/src/chainbin/chain-est-phone-lm.cc
+++ b/src/chainbin/chain-est-phone-lm.cc
@@ -39,31 +39,52 @@ int main(int argc, char *argv[]) {
         " chain-est-phone-lm --leftmost-context-questions=dir/leftmost_questions.txt ark:- dir/phone_G.fst\n";
 
     bool binary_write = true;
+    std::string scales_str;
+
     LanguageModelOptions lm_opts;
 
     ParseOptions po(usage);
     po.Register("binary", &binary_write, "Write output in binary mode");
+    po.Register("scales", &scales_str, "Comma-separated list of scales "
+                "for the different sources of phone sequences");
     lm_opts.Register(&po);
 
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 2) {
+    if (po.NumArgs() < 2) {
       po.PrintUsage();
       exit(1);
     }
 
-    std::string phone_seqs_rspecifier = po.GetArg(1),
-        lm_fst_wxfilename = po.GetArg(2);
-
+    int32 num_sources = po.NumArgs() - 1;
+
+    std::string lm_fst_wxfilename = po.GetArg(po.NumArgs());
+
+    std::vector<int32> scales(num_sources, 1);
+    if (!scales_str.empty()) {
+      std::vector<std::string> parts;
+      SplitStringToVector(scales_str, ":,", false, &parts);
+      if (parts.size() != num_sources) {
+        KALDI_ERR << "--scales must have exactly num-sources = " 
+                  << num_sources << " scales.";
+      }
+      for (size_t i = 0; i < parts.size(); i++) {
+        scales[i] = std::atoi(parts[i].c_str());
+      }
+    }
 
     LanguageModelEstimator lm_estimator(lm_opts);
 
-    SequentialInt32VectorReader phones_reader(phone_seqs_rspecifier);
-    KALDI_LOG << "Reading phone sequences";
-    for (; !phones_reader.Done(); phones_reader.Next()) {
-      const std::vector<int32> &phone_seq = phones_reader.Value();
-      lm_estimator.AddCounts(phone_seq);
+    for (int32 n = 1; n <= num_sources; n++) {
+      std::string phone_seqs_rspecifier = po.GetArg(n);
+      SequentialInt32VectorReader phones_reader(phone_seqs_rspecifier);
+      KALDI_LOG << "Reading phone sequences";
+      for (; !phones_reader.Done(); phones_reader.Next()) {
+        const std::vector<int32> &phone_seq = phones_reader.Value();
+        lm_estimator.AddCounts(phone_seq, scales[n-1]);
+      }
     }
+
     KALDI_LOG << "Estimating phone LM";
     fst::StdVectorFst fst;
     lm_estimator.Estimate(&fst);
diff --git a/src/chainbin/nnet3-chain-copy-egs.cc b/src/chainbin/nnet3-chain-copy-egs.cc
index 4f26e145ac5..c6f643bcae7 100644
--- a/src/chainbin/nnet3-chain-copy-egs.cc
+++ b/src/chainbin/nnet3-chain-copy-egs.cc
@@ -25,6 +25,40 @@
 
 namespace kaldi {
 namespace nnet3 {
+// rename name of NnetIo with old_name to new_name.
+void RenameIoNames(const std::string &old_name,
+                   const std::string &new_name,
+                   NnetChainExample *eg_modified) {
+  // list of io-names in eg_modified.
+  std::vector<std::string> orig_output_names;
+  int32 output_size = eg_modified->outputs.size();
+  for (int32 output_ind = 0; output_ind < output_size; output_ind++)
+    orig_output_names.push_back(eg_modified->outputs[output_ind].name);
+
+  // find the io in eg with name 'old_name'.
+  int32 rename_output_ind =
+     std::find(orig_output_names.begin(), orig_output_names.end(), old_name) -
+      orig_output_names.begin();
+
+  if (rename_output_ind >= output_size)
+    KALDI_ERR << "No io-node with name " << old_name
+              << "exists in eg.";
+  eg_modified->outputs[rename_output_ind].name = new_name;
+}
+
+// ranames NnetIo name with name 'output' to new_output_name
+// and scales the supervision for 'output' using weight.
+void SetWeightAndRenameOutput(BaseFloat weight,
+                              const std::string &new_output_name,
+                              NnetChainExample *eg) {
+  // scale the supervision weight for egs
+  for (int32 i = 0; i < eg->outputs.size(); i++)
+    if (eg->outputs[i].name == "output")
+      if (weight != 0.0 && weight != 1.0)
+        eg->outputs[i].supervision.weight *= weight;
+  // rename output io name to 'new_output_name'.
+  RenameIoNames("output", new_output_name, eg);
+}
 
 // returns an integer randomly drawn with expected value "expected_count"
 // (will be either floor(expected_count) or ceil(expected_count)).
@@ -268,6 +302,8 @@ int main(int argc, char *argv[]) {
     int32 frame_subsampling_factor = -1;
     BaseFloat keep_proportion = 1.0;
     int32 left_context = -1, right_context = -1;
+    std::string eg_weight_rspecifier, eg_output_rspecifier;
+
     ParseOptions po(usage);
     po.Register("random", &random, "If true, will write frames to output "
                 "archives randomly, not round-robin.");
@@ -285,6 +321,15 @@ int main(int argc, char *argv[]) {
                 "feature left-context that we output.");
     po.Register("right-context", &right_context, "Can be used to truncate the "
                 "feature right-context that we output.");
+    po.Register("weights", &eg_weight_rspecifier,
+                "Rspecifier indexed by the key of egs, providing a weight by "
+                "which we will scale the supervision matrix for that eg. "
+                "Used in multilingual training.");
+    po.Register("outputs", &eg_output_rspecifier,
+                "Rspecifier indexed by the key of egs, providing a string-valued "
+                "output name, e.g. 'output-0'.  If provided, the NnetIo with "
+                "name 'output' will be renamed to the provided name. Used in "
+                "multilingual training.");
     po.Read(argc, argv);
 
     srand(srand_seed);
@@ -297,6 +342,8 @@ int main(int argc, char *argv[]) {
     std::string examples_rspecifier = po.GetArg(1);
 
     SequentialNnetChainExampleReader example_reader(examples_rspecifier);
+    RandomAccessTokenReader output_reader(eg_output_rspecifier);
+    RandomAccessBaseFloatReader egs_weight_reader(eg_weight_rspecifier);
 
     int32 num_outputs = po.NumArgs() - 1;
     std::vector<NnetChainExampleWriter*> example_writers(num_outputs);
@@ -307,8 +354,9 @@ int main(int argc, char *argv[]) {
                                             // not configurable for now.
     exclude_names.push_back(std::string("ivector"));
 
-    int64 num_read = 0, num_written = 0;
-
+    int64 num_read = 0, num_written = 0, num_err = 0;
+    bool modify_eg_output = !(eg_output_rspecifier.empty() &&
+                              eg_weight_rspecifier.empty());
     for (; !example_reader.Done(); example_reader.Next(), num_read++) {
       if (frame_subsampling_factor == -1)
         CalculateFrameSubsamplingFactor(example_reader.Value(),
@@ -316,11 +364,41 @@ int main(int argc, char *argv[]) {
       // count is normally 1; could be 0, or possibly >1.
       int32 count = GetCount(keep_proportion);
       std::string key = example_reader.Key();
+      NnetChainExample eg_modified_output;
+      const NnetChainExample &eg_orig = example_reader.Value(),
+        &eg = (modify_eg_output ? eg_modified_output : eg_orig);
+      // Note: in the normal case we just use 'eg'; eg_modified_output is
+      // for the case when the --outputs or --weights option is specified
+      // (only for multilingual training).
+      BaseFloat weight = 1.0;
+      std::string new_output_name;
+      if (modify_eg_output) { // This branch is only taken for multilingual training.
+        eg_modified_output = eg_orig;
+        if (!eg_weight_rspecifier.empty()) {
+          if (!egs_weight_reader.HasKey(key)) {
+            KALDI_WARN << "No weight for example key " << key;
+            num_err++;
+            continue;
+          }
+          weight = egs_weight_reader.Value(key);
+        }
+        if (!eg_output_rspecifier.empty()) {
+          if (!output_reader.HasKey(key)) {
+            KALDI_WARN << "No new output-name for example key " << key;
+            num_err++;
+            continue;
+          }
+          new_output_name = output_reader.Value(key);
+        }
+      }
       if (frame_shift == 0 &&
           left_context == -1 && right_context == -1) {
-        const NnetChainExample &eg = example_reader.Value();
         for (int32 c = 0; c < count; c++) {
           int32 index = (random ? Rand() : num_written) % num_outputs;
+          if (modify_eg_output) // Only for multilingual training
+            SetWeightAndRenameOutput(weight, new_output_name, 
+                                     &eg_modified_output);
+
           example_writers[index]->Write(key, eg);
           num_written++;
         }
@@ -336,6 +414,8 @@ int main(int argc, char *argv[]) {
           eg_out.Swap(&eg);
         for (int32 c = 0; c < count; c++) {
           int32 index = (random ? Rand() : num_written) % num_outputs;
+          if (modify_eg_output)
+            SetWeightAndRenameOutput(weight, new_output_name, &eg_out);
           example_writers[index]->Write(key, eg_out);
           num_written++;
         }
diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc
index c8c251900ec..b644ba0aa01 100644
--- a/src/chainbin/nnet3-chain-get-egs.cc
+++ b/src/chainbin/nnet3-chain-get-egs.cc
@@ -43,6 +43,8 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
                         const MatrixBase<BaseFloat> *ivector_feats,
                         int32 ivector_period,
                         const chain::Supervision &supervision,
+                        const VectorBase<BaseFloat> *deriv_weights,
+                        int32 supervision_length_tolerance,
                         const std::string &utt_id,
                         bool compress,
                         UtteranceSplitter *utt_splitter,
@@ -51,7 +53,18 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
   int32 num_input_frames = feats.NumRows(),
       num_output_frames = supervision.frames_per_sequence;
 
-  if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, num_output_frames))
+  int32 frame_subsampling_factor = utt_splitter->Config().frame_subsampling_factor;
+
+  if (deriv_weights && (std::abs(deriv_weights->Dim() - num_output_frames)
+                        > supervision_length_tolerance)) {
+    KALDI_WARN << "For utterance " << utt_id
+               << ", mismatch between deriv-weights dim and num-output-frames"
+               << "; " << deriv_weights->Dim() << " vs " << num_output_frames;
+    return false;
+  }
+
+  if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, num_output_frames,
+                                  supervision_length_tolerance))
     return false;  // LengthsMatch() will have printed a warning.
 
   std::vector<ChunkTimeInfo> chunks;
@@ -65,8 +78,6 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
     return false;
   }
 
-  int32 frame_subsampling_factor = utt_splitter->Config().frame_subsampling_factor;
-
   chain::SupervisionSplitter sup_splitter(supervision);
 
   for (size_t c = 0; c < chunks.size(); c++) {
@@ -92,19 +103,36 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
 
     int32 first_frame = 0;  // we shift the time-indexes of all these parts so
                             // that the supervised part starts from frame 0.
+    
+    NnetChainExample nnet_chain_eg;
+    nnet_chain_eg.outputs.resize(1);
 
     SubVector<BaseFloat> output_weights(
         &(chunk.output_weights[0]),
         static_cast<int32>(chunk.output_weights.size()));
 
-    NnetChainSupervision nnet_supervision("output", supervision_part,
-                                          output_weights,
-                                          first_frame,
-                                          frame_subsampling_factor);
+    if (!deriv_weights) {
+      NnetChainSupervision nnet_supervision("output", supervision_part,
+                                            output_weights,
+                                            first_frame,
+                                            frame_subsampling_factor);
+      nnet_chain_eg.outputs[0].Swap(&nnet_supervision);
+    } else {
+      Vector<BaseFloat> this_deriv_weights(num_frames_subsampled);
+      for (int32 i = 0; i < num_frames_subsampled; i++) {
+        int32 t = i + start_frame_subsampled;
+        if (t < deriv_weights->Dim())
+          this_deriv_weights(i) = (*deriv_weights)(t);
+      }
+      KALDI_ASSERT(output_weights.Dim() == num_frames_subsampled);
+      this_deriv_weights.MulElements(output_weights);
+      NnetChainSupervision nnet_supervision("output", supervision_part,
+                                            this_deriv_weights,
+                                            first_frame,
+                                            frame_subsampling_factor);
+      nnet_chain_eg.outputs[0].Swap(&nnet_supervision);
+    }
 
-    NnetChainExample nnet_chain_eg;
-    nnet_chain_eg.outputs.resize(1);
-    nnet_chain_eg.outputs[0].Swap(&nnet_supervision);
     nnet_chain_eg.inputs.resize(ivector_feats != NULL ? 2 : 1);
 
     int32 tot_input_frames = chunk.left_context + chunk.num_frames +
@@ -176,13 +204,15 @@ int main(int argc, char *argv[]) {
         "chain-get-supervision.\n";
 
     bool compress = true;
-    int32 length_tolerance = 100, online_ivector_period = 1;
+    int32 length_tolerance = 100, online_ivector_period = 1,
+          supervision_length_tolerance = 1;
 
     ExampleGenerationConfig eg_config;  // controls num-frames,
                                         // left/right-context, etc.
 
+    BaseFloat scale = 1.0;
     int32 srand_seed = 0;
-    std::string online_ivector_rspecifier;
+    std::string online_ivector_rspecifier, deriv_weights_rspecifier;
 
     ParseOptions po(usage);
     po.Register("compress", &compress, "If true, write egs with input features "
@@ -200,6 +230,16 @@ int main(int argc, char *argv[]) {
     po.Register("srand", &srand_seed, "Seed for random number generator ");
     po.Register("length-tolerance", &length_tolerance, "Tolerance for "
                 "difference in num-frames between feat and ivector matrices");
+    po.Register("supervision-length-tolerance", &supervision_length_tolerance, "Tolerance for "
+                "difference in num-frames-subsampled between supervision and deriv weights");
+    po.Register("deriv-weights-rspecifier", &deriv_weights_rspecifier,
+                "Per-frame weights (only binary - 0 or 1) that specifies "
+                "whether a frame's gradient must be backpropagated or not. "
+                "Not specifying this is equivalent to specifying a vector of "
+                "all 1s.");
+    po.Register("normalization-scale", &scale, "Scale the weights from the "
+                "'normalization' FST before applying them to the examples.");
+
     eg_config.Register(&po);
 
     po.Read(argc, argv);
@@ -235,6 +275,14 @@ int main(int argc, char *argv[]) {
     if (!normalization_fst_rxfilename.empty()) {
       ReadFstKaldi(normalization_fst_rxfilename, &normalization_fst);
       KALDI_ASSERT(normalization_fst.NumStates() > 0);
+      
+      if (scale <= 0.0) {
+        KALDI_ERR << "Invalid scale on normalization FST; must be > 0.0";
+      }
+
+      if (scale != 1.0) {
+        ScaleFst(scale, &normalization_fst);
+      }
     }
 
     // Read as GeneralMatrix so we don't need to un-compress and re-compress
@@ -245,6 +293,8 @@ int main(int argc, char *argv[]) {
     NnetChainExampleWriter example_writer(examples_wspecifier);
     RandomAccessBaseFloatMatrixReader online_ivector_reader(
         online_ivector_rspecifier);
+    RandomAccessBaseFloatVectorReader deriv_weights_reader(
+        deriv_weights_rspecifier);
 
     int32 num_err = 0;
 
@@ -278,10 +328,24 @@ int main(int argc, char *argv[]) {
           num_err++;
           continue;
         }
+        
+        const Vector<BaseFloat> *deriv_weights = NULL;
+        if (!deriv_weights_rspecifier.empty()) {
+          if (!deriv_weights_reader.HasKey(key)) {
+            KALDI_WARN << "No deriv weights for utterance " << key;
+            num_err++;
+            continue;
+          } else {
+            // this address will be valid until we call HasKey() or Value()
+            // again.
+            deriv_weights = &(deriv_weights_reader.Value(key));
+          }
+        }
 
         if (!ProcessFile(normalization_fst, feats,
                          online_ivector_feats, online_ivector_period,
-                         supervision, key, compress,
+                         supervision, deriv_weights, supervision_length_tolerance,
+                         key, compress,
                          &utt_splitter, &example_writer))
           num_err++;
       }
diff --git a/src/chainbin/nnet3-chain-normalize-egs.cc b/src/chainbin/nnet3-chain-normalize-egs.cc
index 9d3f56f756a..139c08e7799 100644
--- a/src/chainbin/nnet3-chain-normalize-egs.cc
+++ b/src/chainbin/nnet3-chain-normalize-egs.cc
@@ -41,7 +41,11 @@ int main(int argc, char *argv[]) {
         "e.g.\n"
         "nnet3-chain-normalize-egs dir/normalization.fst ark:train_in.cegs ark:train_out.cegs\n";
 
+    BaseFloat scale = 1.0;
+    
     ParseOptions po(usage);
+    po.Register("normalization-scale", &scale, "Scale the weights from the "
+                "'normalization' FST before applying them to the examples.");
 
     po.Read(argc, argv);
 
@@ -57,6 +61,14 @@ int main(int argc, char *argv[]) {
     fst::StdVectorFst normalization_fst;
     ReadFstKaldi(normalization_fst_rxfilename, &normalization_fst);
 
+    if (scale <= 0.0) {
+      KALDI_ERR << "Invalid scale on normalization FST; must be > 0.0";
+    }
+
+    if (scale != 1.0) {
+      ScaleFst(scale, &normalization_fst);
+    }
+
     SequentialNnetChainExampleReader example_reader(examples_rspecifier);
     NnetChainExampleWriter example_writer(examples_wspecifier);
 
diff --git a/src/nnet3/nnet-chain-combine.cc b/src/nnet3/nnet-chain-combine.cc
index c93858fb06e..67de2b843bb 100644
--- a/src/nnet3/nnet-chain-combine.cc
+++ b/src/nnet3/nnet-chain-combine.cc
@@ -503,18 +503,18 @@ double NnetChainCombiner::ComputeObjfAndDerivFromNnet(
   prob_computer_->Reset();
   std::vector<NnetChainExample>::const_iterator iter = egs_.begin(),
                                                 end = egs_.end();
-  for (; iter != end; ++iter)
+  for (; iter != end; ++iter) {
     prob_computer_->Compute(*iter);
-  const ChainObjectiveInfo *objf_info =
-      prob_computer_->GetObjective("output");
-  if (objf_info == NULL)
-    KALDI_ERR << "Error getting objective info (unsuitable egs?)";
-  KALDI_ASSERT(objf_info->tot_weight > 0.0);
+  }
+
+  std::pair<BaseFloat, BaseFloat> p = prob_computer_->GetTotalObjective();
+  BaseFloat tot_objf = p.first, tot_weight = p.second;
+  KALDI_ASSERT(tot_weight > 0.0);
   const Nnet &deriv = prob_computer_->GetDeriv();
   VectorizeNnet(deriv, nnet_params_deriv);
   // we prefer to deal with normalized objective functions.
-  nnet_params_deriv->Scale(1.0 / objf_info->tot_weight);
-  return (objf_info->tot_like + objf_info->tot_l2_term) / objf_info->tot_weight;
+  nnet_params_deriv->Scale(1.0 / tot_weight);
+  return tot_objf / tot_weight;
 }
 
 
diff --git a/src/nnet3/nnet-chain-diagnostics.cc b/src/nnet3/nnet-chain-diagnostics.cc
index 084b33347df..cd3d5894601 100644
--- a/src/nnet3/nnet-chain-diagnostics.cc
+++ b/src/nnet3/nnet-chain-diagnostics.cc
@@ -207,6 +207,26 @@ bool NnetChainComputeProb::PrintTotalStats() const {
 }
 
 
+std::pair<BaseFloat, BaseFloat> NnetChainComputeProb::GetTotalObjective() const {
+  unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
+      iter, end;
+  iter = objf_info_.begin();
+  end = objf_info_.end();
+  BaseFloat tot_objf = 0.0, tot_weight = 0.0;
+  for (; iter != end; ++iter) {
+    const std::string &name = iter->first;
+    int32 node_index = nnet_.GetNodeIndex(name);
+    KALDI_ASSERT(node_index >= 0);
+    const ChainObjectiveInfo &info = iter->second;
+    BaseFloat like = (info.tot_like / info.tot_weight),
+        l2_term = (info.tot_l2_term / info.tot_weight);
+    tot_objf += like + l2_term;
+    tot_weight += info.tot_weight;
+  }
+  return std::make_pair(tot_objf, tot_weight);
+}
+
+
 const ChainObjectiveInfo* NnetChainComputeProb::GetObjective(
     const std::string &output_name) const {
   unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
@@ -217,15 +237,29 @@ const ChainObjectiveInfo* NnetChainComputeProb::GetObjective(
     return NULL;
 }
 
+static bool HasXentOutputs(const Nnet &nnet) {
+  const std::vector<std::string> node_names = nnet.GetNodeNames();
+  for (std::vector<std::string>::const_iterator it = node_names.begin();
+        it != node_names.end(); ++it) {
+    int32 node_index = nnet.GetNodeIndex(*it);
+    if (nnet.IsOutputNode(node_index) && 
+        it->find("-xent") != std::string::npos) {
+      return true;
+    }
+  }
+  return false;
+}
+
 void RecomputeStats(const std::vector<NnetChainExample> &egs,
                     const chain::ChainTrainingOptions &chain_config_in,
                     const fst::StdVectorFst &den_fst,
                     Nnet *nnet) {
   KALDI_LOG << "Recomputing stats on nnet (affects batch-norm)";
   chain::ChainTrainingOptions chain_config(chain_config_in);
-  if (nnet->GetNodeIndex("output-xent") != -1 &&
+  if (HasXentOutputs(*nnet) &&
       chain_config.xent_regularize == 0) {
-    // this forces it to compute the output for 'output-xent', which
+    // this forces it to compute the output for xent outputs, 
+    // usually 'output-xent', which
     // means that we'll be computing batch-norm stats for any
     // components in that branch that have batch-norm.
     chain_config.xent_regularize = 0.1;
diff --git a/src/nnet3/nnet-chain-diagnostics.h b/src/nnet3/nnet-chain-diagnostics.h
index 4125427c463..b2962cf87d3 100644
--- a/src/nnet3/nnet-chain-diagnostics.h
+++ b/src/nnet3/nnet-chain-diagnostics.h
@@ -83,6 +83,9 @@ class NnetChainComputeProb {
   // or NULL if there is no such info.
   const ChainObjectiveInfo *GetObjective(const std::string &output_name) const;
 
+  // returns the total objective summed over all the outputs
+  std::pair<BaseFloat, BaseFloat> GetTotalObjective() const;
+
   // if config.compute_deriv == true, returns a reference to the
   // computed derivative.  Otherwise crashes.
   const Nnet &GetDeriv() const;
diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc
index 351312fb952..d40df1a79f9 100644
--- a/src/nnet3/nnet-chain-example.cc
+++ b/src/nnet3/nnet-chain-example.cc
@@ -31,8 +31,8 @@ void NnetChainSupervision::Write(std::ostream &os, bool binary) const {
   WriteToken(os, binary, name);
   WriteIndexVector(os, binary, indexes);
   supervision.Write(os, binary);
-  WriteToken(os, binary, "<DW>");  // for DerivWeights.  Want to save space.
-  WriteVectorAsChar(os, binary, deriv_weights);
+  WriteToken(os, binary, "<DW2>");  // for DerivWeights.  Want to save space.
+  deriv_weights.Write(os, binary);
   WriteToken(os, binary, "</NnetChainSup>");
 }
 
@@ -51,8 +51,11 @@ void NnetChainSupervision::Read(std::istream &is, bool binary) {
   ReadToken(is, binary, &token);
   // in the future this back-compatibility code can be reworked.
   if (token != "</NnetChainSup>") {
-    KALDI_ASSERT(token == "<DW>");
-    ReadVectorAsChar(is, binary, &deriv_weights);
+    KALDI_ASSERT(token == "<DW>" || token == "<DW2>");
+    if (token == "<DW>")
+      ReadVectorAsChar(is, binary, &deriv_weights);
+    else
+      deriv_weights.Read(is, binary);
     ExpectToken(is, binary, "</NnetChainSup>");
   }
   CheckDim();
@@ -82,8 +85,7 @@ void NnetChainSupervision::CheckDim() const {
   }
   if (deriv_weights.Dim() != 0) {
     KALDI_ASSERT(deriv_weights.Dim() == indexes.size());
-    KALDI_ASSERT(deriv_weights.Min() >= 0.0 &&
-                 deriv_weights.Max() <= 1.0);
+    KALDI_ASSERT(deriv_weights.Min() >= 0.0);
   }
 }
 
diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc
index 65df0c891c1..5a0eebd9e9a 100644
--- a/src/nnet3/nnet-example-utils.cc
+++ b/src/nnet3/nnet-example-utils.cc
@@ -1265,6 +1265,21 @@ void ExampleMerger::Finish() {
   stats_.PrintStats();
 }
 
+void ScaleFst(BaseFloat scale, fst::StdVectorFst *fst) {
+  typedef fst::StdArc Arc;
+  typedef Arc::StateId StateId;
+  typedef Arc::Weight Weight;
+  
+  for (StateId s = 0; s < fst->NumStates(); s++) {
+    for (fst::MutableArcIterator<fst::StdVectorFst> aiter(fst, s);
+         !aiter.Done(); aiter.Next()) {
+      Arc arc = aiter.Value();
+      Weight weight(arc.weight.Value() * scale);
+      arc.weight = weight;
+      aiter.SetValue(arc);
+    }
+  }
+}
 
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h
index 02620df7485..3dcd90eb980 100644
--- a/src/nnet3/nnet-example-utils.h
+++ b/src/nnet3/nnet-example-utils.h
@@ -516,7 +516,7 @@ class ExampleMerger {
    MapType eg_to_egs_;
 };
 
-
+void ScaleFst(BaseFloat scale, fst::StdVectorFst *fst);
 
 } // namespace nnet3
 } // namespace kaldi