From 77494809783cb1979725293c568dfd9d76788bbb Mon Sep 17 00:00:00 2001
From: Vijayaditya Peddinti <vijayaditya.p@gmail.com>
Date: Mon, 27 Jul 2015 05:20:40 -0400
Subject: [PATCH] Added TDNN recipe for ami/sdm; corrected a bug in
 steps/nnet2/get_egs_discriminative2.sh

---
 egs/ami/s5/RESULTS_sdm                        |  46 +++++
 egs/ami/s5/conf/mfcc_hires.conf               |  10 +
 egs/ami/s5/conf/online_cmvn.conf              |   1 +
 egs/ami/s5/conf/queue_no_k20.conf             |  13 ++
 .../s5/local/online/run_nnet2_ms_perturbed.sh |  33 +++-
 .../s5/local/online/run_nnet2_ms_sp_disc.sh   | 175 ++++++++++++++++++
 egs/ami/s5/run_ihm.sh                         |  11 +-
 egs/ami/s5/run_sdm.sh                         |  16 +-
 .../s5/steps/nnet2/get_egs_discriminative2.sh |   4 +-
 9 files changed, 299 insertions(+), 10 deletions(-)
 create mode 100644 egs/ami/s5/conf/mfcc_hires.conf
 create mode 100644 egs/ami/s5/conf/online_cmvn.conf
 create mode 100644 egs/ami/s5/conf/queue_no_k20.conf
 create mode 100755 egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh

diff --git a/egs/ami/s5/RESULTS_sdm b/egs/ami/s5/RESULTS_sdm
index 919a5a02248..04fd3734669 100644
--- a/egs/ami/s5/RESULTS_sdm
+++ b/egs/ami/s5/RESULTS_sdm
@@ -17,3 +17,49 @@ exp/sdm1/tri2a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_13/eval_o4.ctm.filt.dtl:P
 exp/sdm1/tri3a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_12/eval_o4.ctm.filt.dtl:Percent Total Error       =   69.5%   (62576)
 exp/sdm1/tri3a_mmi_b0.1/decode_eval_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_10/eval_o4.ctm.filt.dtl:Percent Total Error       =   67.2%   (60447)
 
+
+
+# TDNN results
+for x in exp/$mic/nnet2_online/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done
+# Cross entropy training
+%WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
+%WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys
+
+%WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
+%WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
+
+
+# sMBR training
+# dev set
+  # epoch 0
+  %WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
+  %WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys
+  #epoch 1
+  %WER 45.7 | 14207 94490 | 59.5 22.9 17.6 5.3 45.7 70.5 | -24.681 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt/ascore_11/dev_hires_o4.ctm.filt.sys
+  %WER 45.9 | 15232 94491 | 59.9 23.2 17.0 5.7 45.9 65.9 | -24.541 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt_offline/ascore_10/dev_hires_o4.ctm.filt.sys
+  #epoch 2
+  %WER 45.9 | 14543 94497 | 59.3 22.5 18.2 5.3 45.9 68.8 | -24.748 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
+  %WER 46.1 | 14125 94492 | 59.6 22.7 17.7 5.7 46.1 71.1 | -24.626 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt_offline/ascore_11/dev_hires_o4.ctm.filt.sys
+  #epoch 3
+  %WER 46.0 | 15128 94502 | 59.6 23.1 17.3 5.6 46.0 66.2 | -24.565 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
+  %WER 46.2 | 14764 94498 | 59.3 22.3 18.4 5.5 46.2 68.0 | -24.723 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys
+  #epoch 4
+  %WER 46.1 | 15193 94485 | 58.5 21.4 20.1 4.6 46.1 65.8 | -25.114 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt/ascore_15/dev_hires_o4.ctm.filt.sys
+  %WER 46.5 | 15169 94494 | 59.2 22.8 18.0 5.7 46.5 66.3 | -24.554 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys
+
+# eval set
+  #epoch 0
+  %WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
+  %WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
+  #epoch 1
+  %WER 49.3 | 13432 89977 | 55.4 25.2 19.4 4.7 49.3 70.7 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
+  %WER 49.2 | 13497 89975 | 55.5 24.9 19.5 4.7 49.2 70.7 | -23.937 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
+  #epoch 2
+  %WER 49.2 | 13372 89987 | 55.6 25.3 19.0 4.9 49.2 71.0 | -23.850 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt/ascore_13/eval_hires_o4.ctm.filt.sys
+  %WER 48.9 | 13318 89796 | 55.9 25.2 18.9 4.8 48.9 71.3 | -23.901 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt_offline/ascore_13/eval_hires_o4.ctm.filt.sys
+  #epoch 3
+  %WER 49.0 | 14307 89984 | 55.7 25.3 19.0 4.8 49.0 66.3 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt/ascore_14/eval_hires_o4.ctm.filt.sys
+  %WER 48.9 | 14084 89798 | 55.9 25.3 18.8 4.8 48.9 67.4 | -23.884 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt_offline/ascore_14/eval_hires_o4.ctm.filt.sys
+  #epoch 4
+  %WER 49.1 | 13948 89977 | 55.6 25.2 19.2 4.8 49.1 68.2 | -23.902 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt/ascore_15/eval_hires_o4.ctm.filt.sys
+  %WER 49.0 | 14259 89798 | 55.8 25.4 18.8 4.8 49.0 66.6 | -23.873 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt_offline/ascore_15/eval_hires_o4.ctm.filt.sys
diff --git a/egs/ami/s5/conf/mfcc_hires.conf b/egs/ami/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..434834a6725
--- /dev/null
+++ b/egs/ami/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
diff --git a/egs/ami/s5/conf/online_cmvn.conf b/egs/ami/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/ami/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/ami/s5/conf/queue_no_k20.conf b/egs/ami/s5/conf/queue_no_k20.conf
new file mode 100644
index 00000000000..f0cba4df971
--- /dev/null
+++ b/egs/ami/s5/conf/queue_no_k20.conf
@@ -0,0 +1,13 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q
+option gpu=* -l gpu=$0 -q g.q
+default allow_k20=true
+option allow_k20=true
+option allow_k20=false -l 'hostname=!g01*&!g02*&!b06*'
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
index 55d0380b5a8..4f6212f4b42 100755
--- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
+++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
@@ -18,7 +18,9 @@ has_fisher=true
 mic=ihm
 nj=70
 affix=
+hidden_dim=950
 num_threads_ubm=32
+use_sat_alignments=true
 . ./path.sh
 . ./utils/parse_options.sh
 
@@ -33,6 +35,16 @@ EOF
   parallel_opts="--gpu 1" 
   num_threads=1
   minibatch_size=512
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+    parallel_opts="$parallel_opts --config conf/queue_no_k20.conf --allow-k20 false"
+    # that config is like the default config in the text of queue.pl, but adding the following lines.
+    # default allow_k20=true
+    # option allow_k20=true
+    # option allow_k20=false -l 'hostname=!g01&!g02&!b06'
+    # It's a workaround for an NVidia CUDA library bug for our currently installed version
+    # of the CUDA toolkit, that only shows up on k20's
+  fi
+
   # the _a is in case I want to change the parameters.
 else
   # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
@@ -43,12 +55,21 @@ else
 fi
 
 dir=exp/$mic/nnet2_online/nnet_ms_sp${affix:+_$affix}
+
+if [ "$use_sat_alignments" == "true" ] ; then
+  gmm_dir=exp/$mic/tri4a
+  align_script=steps/align_fmllr.sh
+else
+  gmm_dir=exp/$mic/tri3a
+  align_script=steps/align_si.sh
+fi
 final_lm=`cat data/local/lm/final_lm`
 LM=$final_lm.pr1-7
-graph_dir=exp/$mic/tri4a/graph_${LM}
+graph_dir=$gmm_dir/graph_${LM}
 
 # Run the common stages of training, including training the iVector extractor
 local/online/run_nnet2_common.sh --stage $stage --mic $mic \
+  --use-sat-alignments $use_sat_alignments \
   --num-threads-ubm $num_threads_ubm|| exit 1;
 
 if [ $stage -le 6 ]; then
@@ -70,8 +91,8 @@ if [ $stage -le 6 ]; then
 fi
 
 if [ $stage -le 7 ]; then
-  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
-    data/$mic/train_sp data/lang exp/$mic/tri4a exp/$mic/tri4a_sp_ali || exit 1
+  $align_script --nj $nj --cmd "$train_cmd" \
+    data/$mic/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1
 fi
 
 if [ $stage -le 8 ]; then
@@ -118,9 +139,9 @@ if [ $stage -le 10 ]; then
     --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
     --cmd "$decode_cmd" \
     --egs-dir "$common_egs_dir" \
-    --pnorm-input-dim 950 \
-    --pnorm-output-dim 950 \
-    data/$mic/train_hires_sp data/lang exp/$mic/tri4a_sp_ali $dir  || exit 1;
+    --pnorm-input-dim $hidden_dim \
+    --pnorm-output-dim $hidden_dim \
+    data/$mic/train_hires_sp data/lang ${gmm_dir}_sp_ali $dir  || exit 1;
 fi
 
 if [ $stage -le 11 ]; then
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
new file mode 100755
index 00000000000..c80edeb7dbe
--- /dev/null
+++ b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+
+
+# This script does discriminative training on top of the online, multi-splice
+# system trained in run_nnet2_ms.sh.
+# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
+# since the lattice generation runs in about real-time, so takes of the order of
+# 1000 hours of CPU time.
+# 
+# Note: rather than using any features we have dumped on disk, this script
+# regenerates them from the wav data three times-- when we do lattice
+# generation, numerator alignment and discriminative training.  This made the
+# script easier to write and more generic, because we don't have to know where
+# the features and the iVectors are, but of course it's a little inefficient.
+# The time taken is dominated by the lattice generation anyway, so this isn't
+# a huge deal.
+
+. cmd.sh
+
+
+stage=0
+train_stage=-10
+use_gpu=true
+criterion=smbr
+drop_frames=false  # only matters for MMI anyway.
+effective_lrate=0.000005
+srcdir=
+mic=ihm
+num_jobs_nnet=6
+train_stage=-10 # can be used to start training in the middle.
+decode_start_epoch=0 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
+num_epochs=4
+cleanup=false  # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
+               # alignments and degs).
+gmm_dir=exp/$mic/tri4a
+
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1 
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
+EOF
+  fi
+  parallel_opts=" -l gpu=1,hostname='!g01*&!g02*' " #we want to submit to all.q as we use multiple GPUs for this 
+  num_threads=1
+else
+  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
+  # almost the same, but this may be a little bit slow.
+  num_threads=16
+  parallel_opts="-pe smp $num_threads" 
+fi
+
+if [ -z $srcdir ]; then
+  srcdir=exp/$mic/nnet2_online/nnet_ms_sp
+fi
+
+if [ ! -f ${srcdir}_online/final.mdl ]; then
+  echo "$0: expected ${srcdir}_online/final.mdl to exist; first run run_nnet2_ms.sh."
+  exit 1;
+fi
+
+final_lm=`cat data/local/lm/final_lm`
+LM=$final_lm.pr1-7
+graph_dir=$gmm_dir/graph_${LM}
+
+if [ $stage -le 1 ]; then
+  nj=50  # this doesn't really affect anything strongly, except the num-jobs for one of
+         # the phases of get_egs_discriminative2.sh below.
+  num_threads_denlats=6
+  subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
+              # max total slots = 80 * 6 = 480.
+  steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads_denlats" \
+      --online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
+      --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.conf \
+     data/$mic/train_hires_sp data/lang $srcdir ${srcdir}_denlats || exit 1;
+
+fi
+
+if [ $stage -le 2 ]; then
+  # hardcode no-GPU for alignment, although you could use GPU [you wouldn't
+  # get excellent GPU utilization though.]
+  nj=76 # have a high number of jobs because this could take a while, and we might
+         # have some stragglers.
+  use_gpu=no
+  gpu_opts=
+
+  steps/nnet2/align.sh  --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
+     --online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
+     --nj $nj data/$mic/train_hires_sp data/lang $srcdir ${srcdir}_ali || exit 1;
+
+  # the command below is a more generic, but slower, way to do it.
+  # steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
+  #    --nj $nj data/train_hires data/lang ${srcdir}_online ${srcdir}_ali || exit 1;
+fi
+
+
+if [ $stage -le 3 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{1,2,5,6}/$USER/kaldi-data/egs/ami-${mic}-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
+  fi
+  # have a higher maximum num-jobs if
+  if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
+
+  steps/nnet2/get_egs_discriminative2.sh \
+    --stage 0 \
+    --cmd "$decode_cmd -tc $max_jobs" \
+    --online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
+    --criterion $criterion --drop-frames $drop_frames \
+     data/$mic/train_hires_sp data/lang ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
+
+  # the command below is a more generic, but slower, way to do it.
+  #steps/online/nnet2/get_egs_discriminative2.sh \
+  #  --cmd "$decode_cmd -tc $max_jobs" \
+  #  --criterion $criterion --drop-frames $drop_frames \
+  #   data/train_hires data/lang ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
+    --stage $train_stage \
+    --effective-lrate $effective_lrate \
+    --criterion $criterion --drop-frames $drop_frames \
+    --num-epochs $num_epochs \
+    --num-jobs-nnet 6 --num-threads $num_threads \
+      ${srcdir}_degs ${srcdir}_${criterion}_${effective_lrate} || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  dir=${srcdir}_${criterion}_${effective_lrate}
+  ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
+
+  for epoch in $(seq $decode_start_epoch $num_epochs); do
+    for decode_set in dev eval; do
+      (
+        num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+        decode_dir=$dir/decode_epoch${epoch}_${decode_set}_utt
+        
+        steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
+        --per-utt true  --iter epoch$epoch $graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1
+      ) &
+    done
+  done
+
+  for epoch in $(seq $decode_start_epoch $num_epochs); do
+    for decode_set in dev eval; do
+      (
+        num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
+        decode_dir=$dir/decode_epoch${epoch}_${decode_set}_utt_offline
+        
+        steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
+        --per-utt true --online false --iter epoch$epoch $graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1
+      ) &
+    done
+  done
+  
+  wait
+fi
+
+if [ $stage -le 6 ] && $cleanup; then
+  # if you run with "--cleanup true --stage 6" you can clean up.
+  rm ${srcdir}_denlats/lat.*.gz || true
+  rm ${srcdir}_ali/ali.*.gz || true
+  steps/nnet2/remove_egs.sh ${srcdir}_degs || true
+fi
+
+
+exit 0;
diff --git a/egs/ami/s5/run_ihm.sh b/egs/ami/s5/run_ihm.sh
index be047f8f369..4590ba1deb8 100755
--- a/egs/ami/s5/run_ihm.sh
+++ b/egs/ami/s5/run_ihm.sh
@@ -174,7 +174,16 @@ fi
 
 # TDNN training.
 if [ $stage -le 13 ]; then
-  local/online/run_nnet2_ms_perturbed.sh --mic $mic
+  local/online/run_nnet2_ms_perturbed.sh \
+    --mic $mic \
+    --hidden-dim 950 \
+    --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
+    --use-sat-alignments true
+  
+  local/online/run_nnet2_ms_sp_disc.sh  \
+    --mic $mic  \
+    --gmm-dir exp/$mic/tri4a \
+    --srcdir exp/$mic/nnet2_online/nnet_ms_sp
 fi
 
 echo "Done!"
diff --git a/egs/ami/s5/run_sdm.sh b/egs/ami/s5/run_sdm.sh
index e7fbe19e15a..3ae7e2c67df 100755
--- a/egs/ami/s5/run_sdm.sh
+++ b/egs/ami/s5/run_sdm.sh
@@ -38,7 +38,6 @@ if [ $stage -le 2 ]; then
   local/ami_sdm_scoring_data_prep.sh $AMI_DIR $micid dev
   local/ami_sdm_scoring_data_prep.sh $AMI_DIR $micid eval
 fi
-
 # Here starts the normal recipe, which is mostly shared across mic scenarios,
 # - for ihm we adapt to speaker by fMLLR,
 # - for sdm and mdm we do not adapt for speaker, but for environment only (cmn),
@@ -155,7 +154,20 @@ if [ $stage -le 12 ]; then
   local/nnet/run_dnn_lda_mllt.sh $mic
 fi
 
-echo "Done!"
+# TDNN training.
+if [ $stage -le 13 ]; then
+  local/online/run_nnet2_ms_perturbed.sh \
+    --mic $mic \
+    --hidden-dim 850 \
+    --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
+    --use-sat-alignments false
+  
+  local/online/run_nnet2_ms_sp_disc.sh  \
+    --mic $mic  \
+    --gmm-dir exp/$mic/tri3a \
+    --srcdir exp/$mic/nnet2_online/nnet_ms_sp
+fi
+echo "Done."
 
 
 # By default we do not build systems adapted to sessions for AMI in distant scnearios 
diff --git a/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh b/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
index 0c20232739d..c932e0463cc 100755
--- a/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
+++ b/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
@@ -91,8 +91,10 @@ utils/split_data.sh $data $nj
 
 if [ $nj_ali -eq $nj ]; then
   ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
+  prior_ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz | copy-int-vector ark:- ark,t:- | utils/filter_scp.pl $dir/priors_uttlist | ali-to-pdf $alidir/final.mdl ark,t:- ark:- |"
 else
   ali_rspecifier="scp:$dir/ali.scp"
+  prior_ali_rspecifier="ark,s,cs:utils/filter_scp.pl $dir/priors_uttlist $dir/ali.scp | ali-to-pdf $alidir/final.mdl scp:- ark:- |"
   if [ $stage -le 1 ]; then
     echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
     all_ids=$(seq -s, $nj_ali)
@@ -266,7 +268,7 @@ echo "$0: dumping egs for prior adjustment in the background."
 
 $cmd JOB=1:$nj $dir/log/create_priors_subset.JOB.log \
   nnet-get-egs $ivectors_opt $nnet_context_opts "$priors_feats" \
-  "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | copy-int-vector ark:- ark,t:- | utils/filter_scp.pl $dir/priors_uttlist | ali-to-pdf $alidir/final.mdl ark,t:- ark:- | ali-to-post ark:- ark:- |" \
+  "$prior_ali_rspecifier ali-to-post ark:- ark:- |" \
   ark:- \| nnet-copy-egs ark:- $priors_egs_list || \
   { touch $dir/.error; echo "Error in creating priors subset. See $dir/log/create_priors_subset.*.log"; exit 1; }